1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2014, The Linux Foundation. All rights reserved. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/mem_encrypt.h> 9 #include <linux/sched.h> 10 #include <linux/vmalloc.h> 11 #include <linux/pagewalk.h> 12 13 #include <asm/cacheflush.h> 14 #include <asm/pgtable-prot.h> 15 #include <asm/set_memory.h> 16 #include <asm/tlbflush.h> 17 #include <asm/kfence.h> 18 19 struct page_change_data { 20 pgprot_t set_mask; 21 pgprot_t clear_mask; 22 }; 23 24 static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk) 25 { 26 struct page_change_data *masks = walk->private; 27 28 val &= ~(pgprot_val(masks->clear_mask)); 29 val |= (pgprot_val(masks->set_mask)); 30 31 return val; 32 } 33 34 static int pageattr_pud_entry(pud_t *pud, unsigned long addr, 35 unsigned long next, struct mm_walk *walk) 36 { 37 pud_t val = pudp_get(pud); 38 39 if (pud_sect(val)) { 40 if (WARN_ON_ONCE((next - addr) != PUD_SIZE)) 41 return -EINVAL; 42 val = __pud(set_pageattr_masks(pud_val(val), walk)); 43 set_pud(pud, val); 44 walk->action = ACTION_CONTINUE; 45 } 46 47 return 0; 48 } 49 50 static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, 51 unsigned long next, struct mm_walk *walk) 52 { 53 pmd_t val = pmdp_get(pmd); 54 55 if (pmd_sect(val)) { 56 if (WARN_ON_ONCE((next - addr) != PMD_SIZE)) 57 return -EINVAL; 58 val = __pmd(set_pageattr_masks(pmd_val(val), walk)); 59 set_pmd(pmd, val); 60 walk->action = ACTION_CONTINUE; 61 } 62 63 return 0; 64 } 65 66 static int pageattr_pte_entry(pte_t *pte, unsigned long addr, 67 unsigned long next, struct mm_walk *walk) 68 { 69 pte_t val = __ptep_get(pte); 70 71 val = __pte(set_pageattr_masks(pte_val(val), walk)); 72 __set_pte(pte, val); 73 74 return 0; 75 } 76 77 static const struct mm_walk_ops pageattr_ops = { 78 .pud_entry = pageattr_pud_entry, 79 .pmd_entry = pageattr_pmd_entry, 80 .pte_entry = pageattr_pte_entry, 81 }; 82 83 bool rodata_full __ro_after_init = true; 84 85 bool can_set_direct_map(void) 86 { 87 /* 88 * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear 89 * map to be mapped at page granularity, so that it is possible to 90 * protect/unprotect single pages. 91 * 92 * KFENCE pool requires page-granular mapping if initialized late. 93 * 94 * Realms need to make pages shared/protected at page granularity. 95 */ 96 return rodata_full || debug_pagealloc_enabled() || 97 arm64_kfence_can_set_direct_map() || is_realm_world(); 98 } 99 100 static int update_range_prot(unsigned long start, unsigned long size, 101 pgprot_t set_mask, pgprot_t clear_mask) 102 { 103 struct page_change_data data; 104 int ret; 105 106 data.set_mask = set_mask; 107 data.clear_mask = clear_mask; 108 109 ret = split_kernel_leaf_mapping(start, start + size); 110 if (WARN_ON_ONCE(ret)) 111 return ret; 112 113 arch_enter_lazy_mmu_mode(); 114 115 /* 116 * The caller must ensure that the range we are operating on does not 117 * partially overlap a block mapping, or a cont mapping. Any such case 118 * must be eliminated by splitting the mapping. 119 */ 120 ret = walk_kernel_page_table_range_lockless(start, start + size, 121 &pageattr_ops, NULL, &data); 122 arch_leave_lazy_mmu_mode(); 123 124 return ret; 125 } 126 127 static int __change_memory_common(unsigned long start, unsigned long size, 128 pgprot_t set_mask, pgprot_t clear_mask) 129 { 130 int ret; 131 132 ret = update_range_prot(start, size, set_mask, clear_mask); 133 134 /* 135 * If the memory is being made valid without changing any other bits 136 * then a TLBI isn't required as a non-valid entry cannot be cached in 137 * the TLB. 138 */ 139 if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) 140 flush_tlb_kernel_range(start, start + size); 141 return ret; 142 } 143 144 static int change_memory_common(unsigned long addr, int numpages, 145 pgprot_t set_mask, pgprot_t clear_mask) 146 { 147 unsigned long start = addr; 148 unsigned long size = PAGE_SIZE * numpages; 149 unsigned long end = start + size; 150 struct vm_struct *area; 151 int i; 152 153 if (!PAGE_ALIGNED(addr)) { 154 start &= PAGE_MASK; 155 end = start + size; 156 WARN_ON_ONCE(1); 157 } 158 159 /* 160 * Kernel VA mappings are always live, and splitting live section 161 * mappings into page mappings may cause TLB conflicts. This means 162 * we have to ensure that changing the permission bits of the range 163 * we are operating on does not result in such splitting. 164 * 165 * Let's restrict ourselves to mappings created by vmalloc (or vmap). 166 * Disallow VM_ALLOW_HUGE_VMAP mappings to guarantee that only page 167 * mappings are updated and splitting is never needed. 168 * 169 * So check whether the [addr, addr + size) interval is entirely 170 * covered by precisely one VM area that has the VM_ALLOC flag set. 171 */ 172 area = find_vm_area((void *)addr); 173 if (!area || 174 end > (unsigned long)kasan_reset_tag(area->addr) + area->size || 175 ((area->flags & (VM_ALLOC | VM_ALLOW_HUGE_VMAP)) != VM_ALLOC)) 176 return -EINVAL; 177 178 if (!numpages) 179 return 0; 180 181 /* 182 * If we are manipulating read-only permissions, apply the same 183 * change to the linear mapping of the pages that back this VM area. 184 */ 185 if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || 186 pgprot_val(clear_mask) == PTE_RDONLY)) { 187 for (i = 0; i < area->nr_pages; i++) { 188 __change_memory_common((u64)page_address(area->pages[i]), 189 PAGE_SIZE, set_mask, clear_mask); 190 } 191 } 192 193 /* 194 * Get rid of potentially aliasing lazily unmapped vm areas that may 195 * have permissions set that deviate from the ones we are setting here. 196 */ 197 vm_unmap_aliases(); 198 199 return __change_memory_common(start, size, set_mask, clear_mask); 200 } 201 202 int set_memory_ro(unsigned long addr, int numpages) 203 { 204 return change_memory_common(addr, numpages, 205 __pgprot(PTE_RDONLY), 206 __pgprot(PTE_WRITE)); 207 } 208 209 int set_memory_rw(unsigned long addr, int numpages) 210 { 211 return change_memory_common(addr, numpages, 212 __pgprot(PTE_WRITE), 213 __pgprot(PTE_RDONLY)); 214 } 215 216 int set_memory_nx(unsigned long addr, int numpages) 217 { 218 return change_memory_common(addr, numpages, 219 __pgprot(PTE_PXN), 220 __pgprot(PTE_MAYBE_GP)); 221 } 222 223 int set_memory_x(unsigned long addr, int numpages) 224 { 225 return change_memory_common(addr, numpages, 226 __pgprot(PTE_MAYBE_GP), 227 __pgprot(PTE_PXN)); 228 } 229 230 int set_memory_valid(unsigned long addr, int numpages, int enable) 231 { 232 if (enable) 233 return __change_memory_common(addr, PAGE_SIZE * numpages, 234 __pgprot(PTE_VALID), 235 __pgprot(0)); 236 else 237 return __change_memory_common(addr, PAGE_SIZE * numpages, 238 __pgprot(0), 239 __pgprot(PTE_VALID)); 240 } 241 242 int set_direct_map_invalid_noflush(struct page *page) 243 { 244 pgprot_t clear_mask = __pgprot(PTE_VALID); 245 pgprot_t set_mask = __pgprot(0); 246 247 if (!can_set_direct_map()) 248 return 0; 249 250 return update_range_prot((unsigned long)page_address(page), 251 PAGE_SIZE, set_mask, clear_mask); 252 } 253 254 int set_direct_map_default_noflush(struct page *page) 255 { 256 pgprot_t set_mask = __pgprot(PTE_VALID | PTE_WRITE); 257 pgprot_t clear_mask = __pgprot(PTE_RDONLY); 258 259 if (!can_set_direct_map()) 260 return 0; 261 262 return update_range_prot((unsigned long)page_address(page), 263 PAGE_SIZE, set_mask, clear_mask); 264 } 265 266 static int __set_memory_enc_dec(unsigned long addr, 267 int numpages, 268 bool encrypt) 269 { 270 unsigned long set_prot = 0, clear_prot = 0; 271 phys_addr_t start, end; 272 int ret; 273 274 if (!is_realm_world()) 275 return 0; 276 277 if (!__is_lm_address(addr)) 278 return -EINVAL; 279 280 start = __virt_to_phys(addr); 281 end = start + numpages * PAGE_SIZE; 282 283 if (encrypt) 284 clear_prot = PROT_NS_SHARED; 285 else 286 set_prot = PROT_NS_SHARED; 287 288 /* 289 * Break the mapping before we make any changes to avoid stale TLB 290 * entries or Synchronous External Aborts caused by RIPAS_EMPTY 291 */ 292 ret = __change_memory_common(addr, PAGE_SIZE * numpages, 293 __pgprot(set_prot), 294 __pgprot(clear_prot | PTE_VALID)); 295 296 if (ret) 297 return ret; 298 299 if (encrypt) 300 ret = rsi_set_memory_range_protected(start, end); 301 else 302 ret = rsi_set_memory_range_shared(start, end); 303 304 if (ret) 305 return ret; 306 307 return __change_memory_common(addr, PAGE_SIZE * numpages, 308 __pgprot(PTE_VALID), 309 __pgprot(0)); 310 } 311 312 static int realm_set_memory_encrypted(unsigned long addr, int numpages) 313 { 314 int ret = __set_memory_enc_dec(addr, numpages, true); 315 316 /* 317 * If the request to change state fails, then the only sensible cause 318 * of action for the caller is to leak the memory 319 */ 320 WARN(ret, "Failed to encrypt memory, %d pages will be leaked", 321 numpages); 322 323 return ret; 324 } 325 326 static int realm_set_memory_decrypted(unsigned long addr, int numpages) 327 { 328 int ret = __set_memory_enc_dec(addr, numpages, false); 329 330 WARN(ret, "Failed to decrypt memory, %d pages will be leaked", 331 numpages); 332 333 return ret; 334 } 335 336 static const struct arm64_mem_crypt_ops realm_crypt_ops = { 337 .encrypt = realm_set_memory_encrypted, 338 .decrypt = realm_set_memory_decrypted, 339 }; 340 341 int realm_register_memory_enc_ops(void) 342 { 343 return arm64_mem_crypt_ops_register(&realm_crypt_ops); 344 } 345 346 int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) 347 { 348 unsigned long addr = (unsigned long)page_address(page); 349 350 if (!can_set_direct_map()) 351 return 0; 352 353 return set_memory_valid(addr, nr, valid); 354 } 355 356 #ifdef CONFIG_DEBUG_PAGEALLOC 357 /* 358 * This is - apart from the return value - doing the same 359 * thing as the new set_direct_map_valid_noflush() function. 360 * 361 * Unify? Explain the conceptual differences? 362 */ 363 void __kernel_map_pages(struct page *page, int numpages, int enable) 364 { 365 if (!can_set_direct_map()) 366 return; 367 368 set_memory_valid((unsigned long)page_address(page), numpages, enable); 369 } 370 #endif /* CONFIG_DEBUG_PAGEALLOC */ 371 372 /* 373 * This function is used to determine if a linear map page has been marked as 374 * not-valid. Walk the page table and check the PTE_VALID bit. 375 * 376 * Because this is only called on the kernel linear map, p?d_sect() implies 377 * p?d_present(). When debug_pagealloc is enabled, sections mappings are 378 * disabled. 379 */ 380 bool kernel_page_present(struct page *page) 381 { 382 pgd_t *pgdp; 383 p4d_t *p4dp; 384 pud_t *pudp, pud; 385 pmd_t *pmdp, pmd; 386 pte_t *ptep; 387 unsigned long addr = (unsigned long)page_address(page); 388 389 pgdp = pgd_offset_k(addr); 390 if (pgd_none(READ_ONCE(*pgdp))) 391 return false; 392 393 p4dp = p4d_offset(pgdp, addr); 394 if (p4d_none(READ_ONCE(*p4dp))) 395 return false; 396 397 pudp = pud_offset(p4dp, addr); 398 pud = READ_ONCE(*pudp); 399 if (pud_none(pud)) 400 return false; 401 if (pud_sect(pud)) 402 return true; 403 404 pmdp = pmd_offset(pudp, addr); 405 pmd = READ_ONCE(*pmdp); 406 if (pmd_none(pmd)) 407 return false; 408 if (pmd_sect(pmd)) 409 return true; 410 411 ptep = pte_offset_kernel(pmdp, addr); 412 return pte_valid(__ptep_get(ptep)); 413 } 414