1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2014, The Linux Foundation. All rights reserved. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/mm.h> 7 #include <linux/module.h> 8 #include <linux/mem_encrypt.h> 9 #include <linux/sched.h> 10 #include <linux/vmalloc.h> 11 12 #include <asm/cacheflush.h> 13 #include <asm/pgtable-prot.h> 14 #include <asm/set_memory.h> 15 #include <asm/tlbflush.h> 16 #include <asm/kfence.h> 17 18 struct page_change_data { 19 pgprot_t set_mask; 20 pgprot_t clear_mask; 21 }; 22 23 bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED); 24 25 bool can_set_direct_map(void) 26 { 27 /* 28 * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear 29 * map to be mapped at page granularity, so that it is possible to 30 * protect/unprotect single pages. 31 * 32 * KFENCE pool requires page-granular mapping if initialized late. 33 * 34 * Realms need to make pages shared/protected at page granularity. 35 */ 36 return rodata_full || debug_pagealloc_enabled() || 37 arm64_kfence_can_set_direct_map() || is_realm_world(); 38 } 39 40 static int change_page_range(pte_t *ptep, unsigned long addr, void *data) 41 { 42 struct page_change_data *cdata = data; 43 pte_t pte = __ptep_get(ptep); 44 45 pte = clear_pte_bit(pte, cdata->clear_mask); 46 pte = set_pte_bit(pte, cdata->set_mask); 47 48 __set_pte(ptep, pte); 49 return 0; 50 } 51 52 /* 53 * This function assumes that the range is mapped with PAGE_SIZE pages. 54 */ 55 static int __change_memory_common(unsigned long start, unsigned long size, 56 pgprot_t set_mask, pgprot_t clear_mask) 57 { 58 struct page_change_data data; 59 int ret; 60 61 data.set_mask = set_mask; 62 data.clear_mask = clear_mask; 63 64 ret = apply_to_page_range(&init_mm, start, size, change_page_range, 65 &data); 66 67 /* 68 * If the memory is being made valid without changing any other bits 69 * then a TLBI isn't required as a non-valid entry cannot be cached in 70 * the TLB. 71 */ 72 if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) 73 flush_tlb_kernel_range(start, start + size); 74 return ret; 75 } 76 77 static int change_memory_common(unsigned long addr, int numpages, 78 pgprot_t set_mask, pgprot_t clear_mask) 79 { 80 unsigned long start = addr; 81 unsigned long size = PAGE_SIZE * numpages; 82 unsigned long end = start + size; 83 struct vm_struct *area; 84 int i; 85 86 if (!PAGE_ALIGNED(addr)) { 87 start &= PAGE_MASK; 88 end = start + size; 89 WARN_ON_ONCE(1); 90 } 91 92 /* 93 * Kernel VA mappings are always live, and splitting live section 94 * mappings into page mappings may cause TLB conflicts. This means 95 * we have to ensure that changing the permission bits of the range 96 * we are operating on does not result in such splitting. 97 * 98 * Let's restrict ourselves to mappings created by vmalloc (or vmap). 99 * Those are guaranteed to consist entirely of page mappings, and 100 * splitting is never needed. 101 * 102 * So check whether the [addr, addr + size) interval is entirely 103 * covered by precisely one VM area that has the VM_ALLOC flag set. 104 */ 105 area = find_vm_area((void *)addr); 106 if (!area || 107 end > (unsigned long)kasan_reset_tag(area->addr) + area->size || 108 !(area->flags & VM_ALLOC)) 109 return -EINVAL; 110 111 if (!numpages) 112 return 0; 113 114 /* 115 * If we are manipulating read-only permissions, apply the same 116 * change to the linear mapping of the pages that back this VM area. 117 */ 118 if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || 119 pgprot_val(clear_mask) == PTE_RDONLY)) { 120 for (i = 0; i < area->nr_pages; i++) { 121 __change_memory_common((u64)page_address(area->pages[i]), 122 PAGE_SIZE, set_mask, clear_mask); 123 } 124 } 125 126 /* 127 * Get rid of potentially aliasing lazily unmapped vm areas that may 128 * have permissions set that deviate from the ones we are setting here. 129 */ 130 vm_unmap_aliases(); 131 132 return __change_memory_common(start, size, set_mask, clear_mask); 133 } 134 135 int set_memory_ro(unsigned long addr, int numpages) 136 { 137 return change_memory_common(addr, numpages, 138 __pgprot(PTE_RDONLY), 139 __pgprot(PTE_WRITE)); 140 } 141 142 int set_memory_rw(unsigned long addr, int numpages) 143 { 144 return change_memory_common(addr, numpages, 145 __pgprot(PTE_WRITE), 146 __pgprot(PTE_RDONLY)); 147 } 148 149 int set_memory_nx(unsigned long addr, int numpages) 150 { 151 return change_memory_common(addr, numpages, 152 __pgprot(PTE_PXN), 153 __pgprot(PTE_MAYBE_GP)); 154 } 155 156 int set_memory_x(unsigned long addr, int numpages) 157 { 158 return change_memory_common(addr, numpages, 159 __pgprot(PTE_MAYBE_GP), 160 __pgprot(PTE_PXN)); 161 } 162 163 int set_memory_valid(unsigned long addr, int numpages, int enable) 164 { 165 if (enable) 166 return __change_memory_common(addr, PAGE_SIZE * numpages, 167 __pgprot(PTE_VALID), 168 __pgprot(0)); 169 else 170 return __change_memory_common(addr, PAGE_SIZE * numpages, 171 __pgprot(0), 172 __pgprot(PTE_VALID)); 173 } 174 175 int set_direct_map_invalid_noflush(struct page *page) 176 { 177 struct page_change_data data = { 178 .set_mask = __pgprot(0), 179 .clear_mask = __pgprot(PTE_VALID), 180 }; 181 182 if (!can_set_direct_map()) 183 return 0; 184 185 return apply_to_page_range(&init_mm, 186 (unsigned long)page_address(page), 187 PAGE_SIZE, change_page_range, &data); 188 } 189 190 int set_direct_map_default_noflush(struct page *page) 191 { 192 struct page_change_data data = { 193 .set_mask = __pgprot(PTE_VALID | PTE_WRITE), 194 .clear_mask = __pgprot(PTE_RDONLY), 195 }; 196 197 if (!can_set_direct_map()) 198 return 0; 199 200 return apply_to_page_range(&init_mm, 201 (unsigned long)page_address(page), 202 PAGE_SIZE, change_page_range, &data); 203 } 204 205 static int __set_memory_enc_dec(unsigned long addr, 206 int numpages, 207 bool encrypt) 208 { 209 unsigned long set_prot = 0, clear_prot = 0; 210 phys_addr_t start, end; 211 int ret; 212 213 if (!is_realm_world()) 214 return 0; 215 216 if (!__is_lm_address(addr)) 217 return -EINVAL; 218 219 start = __virt_to_phys(addr); 220 end = start + numpages * PAGE_SIZE; 221 222 if (encrypt) 223 clear_prot = PROT_NS_SHARED; 224 else 225 set_prot = PROT_NS_SHARED; 226 227 /* 228 * Break the mapping before we make any changes to avoid stale TLB 229 * entries or Synchronous External Aborts caused by RIPAS_EMPTY 230 */ 231 ret = __change_memory_common(addr, PAGE_SIZE * numpages, 232 __pgprot(set_prot), 233 __pgprot(clear_prot | PTE_VALID)); 234 235 if (ret) 236 return ret; 237 238 if (encrypt) 239 ret = rsi_set_memory_range_protected(start, end); 240 else 241 ret = rsi_set_memory_range_shared(start, end); 242 243 if (ret) 244 return ret; 245 246 return __change_memory_common(addr, PAGE_SIZE * numpages, 247 __pgprot(PTE_VALID), 248 __pgprot(0)); 249 } 250 251 static int realm_set_memory_encrypted(unsigned long addr, int numpages) 252 { 253 int ret = __set_memory_enc_dec(addr, numpages, true); 254 255 /* 256 * If the request to change state fails, then the only sensible cause 257 * of action for the caller is to leak the memory 258 */ 259 WARN(ret, "Failed to encrypt memory, %d pages will be leaked", 260 numpages); 261 262 return ret; 263 } 264 265 static int realm_set_memory_decrypted(unsigned long addr, int numpages) 266 { 267 int ret = __set_memory_enc_dec(addr, numpages, false); 268 269 WARN(ret, "Failed to decrypt memory, %d pages will be leaked", 270 numpages); 271 272 return ret; 273 } 274 275 static const struct arm64_mem_crypt_ops realm_crypt_ops = { 276 .encrypt = realm_set_memory_encrypted, 277 .decrypt = realm_set_memory_decrypted, 278 }; 279 280 int realm_register_memory_enc_ops(void) 281 { 282 return arm64_mem_crypt_ops_register(&realm_crypt_ops); 283 } 284 285 #ifdef CONFIG_DEBUG_PAGEALLOC 286 void __kernel_map_pages(struct page *page, int numpages, int enable) 287 { 288 if (!can_set_direct_map()) 289 return; 290 291 set_memory_valid((unsigned long)page_address(page), numpages, enable); 292 } 293 #endif /* CONFIG_DEBUG_PAGEALLOC */ 294 295 /* 296 * This function is used to determine if a linear map page has been marked as 297 * not-valid. Walk the page table and check the PTE_VALID bit. 298 * 299 * Because this is only called on the kernel linear map, p?d_sect() implies 300 * p?d_present(). When debug_pagealloc is enabled, sections mappings are 301 * disabled. 302 */ 303 bool kernel_page_present(struct page *page) 304 { 305 pgd_t *pgdp; 306 p4d_t *p4dp; 307 pud_t *pudp, pud; 308 pmd_t *pmdp, pmd; 309 pte_t *ptep; 310 unsigned long addr = (unsigned long)page_address(page); 311 312 pgdp = pgd_offset_k(addr); 313 if (pgd_none(READ_ONCE(*pgdp))) 314 return false; 315 316 p4dp = p4d_offset(pgdp, addr); 317 if (p4d_none(READ_ONCE(*p4dp))) 318 return false; 319 320 pudp = pud_offset(p4dp, addr); 321 pud = READ_ONCE(*pudp); 322 if (pud_none(pud)) 323 return false; 324 if (pud_sect(pud)) 325 return true; 326 327 pmdp = pmd_offset(pudp, addr); 328 pmd = READ_ONCE(*pmdp); 329 if (pmd_none(pmd)) 330 return false; 331 if (pmd_sect(pmd)) 332 return true; 333 334 ptep = pte_offset_kernel(pmdp, addr); 335 return pte_valid(__ptep_get(ptep)); 336 } 337