1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2016-2024 Advanced Micro Devices, Inc. 6 * 7 * Author: Tom Lendacky <thomas.lendacky@amd.com> 8 */ 9 10 #include <linux/linkage.h> 11 #include <linux/init.h> 12 #include <linux/mm.h> 13 #include <linux/dma-direct.h> 14 #include <linux/swiotlb.h> 15 #include <linux/mem_encrypt.h> 16 #include <linux/device.h> 17 #include <linux/kernel.h> 18 #include <linux/bitops.h> 19 #include <linux/dma-mapping.h> 20 #include <linux/cc_platform.h> 21 22 #include <asm/tlbflush.h> 23 #include <asm/fixmap.h> 24 #include <asm/setup.h> 25 #include <asm/mem_encrypt.h> 26 #include <asm/bootparam.h> 27 #include <asm/set_memory.h> 28 #include <asm/cacheflush.h> 29 #include <asm/processor-flags.h> 30 #include <asm/msr.h> 31 #include <asm/cmdline.h> 32 #include <asm/sev.h> 33 #include <asm/ia32.h> 34 35 #include "mm_internal.h" 36 37 /* 38 * Since SME related variables are set early in the boot process they must 39 * reside in the .data section so as not to be zeroed out when the .bss 40 * section is later cleared. 41 */ 42 u64 sme_me_mask __section(".data") = 0; 43 u64 sev_status __section(".data") = 0; 44 u64 sev_check_data __section(".data") = 0; 45 EXPORT_SYMBOL(sme_me_mask); 46 47 /* Buffer used for early in-place encryption by BSP, no locking needed */ 48 static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); 49 50 /* 51 * SNP-specific routine which needs to additionally change the page state from 52 * private to shared before copying the data from the source to destination and 53 * restore after the copy. 54 */ 55 static inline void __init snp_memcpy(void *dst, void *src, size_t sz, 56 unsigned long paddr, bool decrypt) 57 { 58 unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 59 60 if (decrypt) { 61 /* 62 * @paddr needs to be accessed decrypted, mark the page shared in 63 * the RMP table before copying it. 64 */ 65 early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages); 66 67 memcpy(dst, src, sz); 68 69 /* Restore the page state after the memcpy. */ 70 early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages); 71 } else { 72 /* 73 * @paddr need to be accessed encrypted, no need for the page state 74 * change. 75 */ 76 memcpy(dst, src, sz); 77 } 78 } 79 80 /* 81 * This routine does not change the underlying encryption setting of the 82 * page(s) that map this memory. It assumes that eventually the memory is 83 * meant to be accessed as either encrypted or decrypted but the contents 84 * are currently not in the desired state. 85 * 86 * This routine follows the steps outlined in the AMD64 Architecture 87 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. 88 */ 89 static void __init __sme_early_enc_dec(resource_size_t paddr, 90 unsigned long size, bool enc) 91 { 92 void *src, *dst; 93 size_t len; 94 95 if (!sme_me_mask) 96 return; 97 98 wbinvd(); 99 100 /* 101 * There are limited number of early mapping slots, so map (at most) 102 * one page at time. 103 */ 104 while (size) { 105 len = min_t(size_t, sizeof(sme_early_buffer), size); 106 107 /* 108 * Create mappings for the current and desired format of 109 * the memory. Use a write-protected mapping for the source. 110 */ 111 src = enc ? early_memremap_decrypted_wp(paddr, len) : 112 early_memremap_encrypted_wp(paddr, len); 113 114 dst = enc ? early_memremap_encrypted(paddr, len) : 115 early_memremap_decrypted(paddr, len); 116 117 /* 118 * If a mapping can't be obtained to perform the operation, 119 * then eventual access of that area in the desired mode 120 * will cause a crash. 121 */ 122 BUG_ON(!src || !dst); 123 124 /* 125 * Use a temporary buffer, of cache-line multiple size, to 126 * avoid data corruption as documented in the APM. 127 */ 128 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 129 snp_memcpy(sme_early_buffer, src, len, paddr, enc); 130 snp_memcpy(dst, sme_early_buffer, len, paddr, !enc); 131 } else { 132 memcpy(sme_early_buffer, src, len); 133 memcpy(dst, sme_early_buffer, len); 134 } 135 136 early_memunmap(dst, len); 137 early_memunmap(src, len); 138 139 paddr += len; 140 size -= len; 141 } 142 } 143 144 void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) 145 { 146 __sme_early_enc_dec(paddr, size, true); 147 } 148 149 void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) 150 { 151 __sme_early_enc_dec(paddr, size, false); 152 } 153 154 static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, 155 bool map) 156 { 157 unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; 158 pmdval_t pmd_flags, pmd; 159 160 /* Use early_pmd_flags but remove the encryption mask */ 161 pmd_flags = __sme_clr(early_pmd_flags); 162 163 do { 164 pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; 165 __early_make_pgtable((unsigned long)vaddr, pmd); 166 167 vaddr += PMD_SIZE; 168 paddr += PMD_SIZE; 169 size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; 170 } while (size); 171 172 flush_tlb_local(); 173 } 174 175 void __init sme_unmap_bootdata(char *real_mode_data) 176 { 177 struct boot_params *boot_data; 178 unsigned long cmdline_paddr; 179 180 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) 181 return; 182 183 /* Get the command line address before unmapping the real_mode_data */ 184 boot_data = (struct boot_params *)real_mode_data; 185 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 186 187 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), false); 188 189 if (!cmdline_paddr) 190 return; 191 192 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, false); 193 } 194 195 void __init sme_map_bootdata(char *real_mode_data) 196 { 197 struct boot_params *boot_data; 198 unsigned long cmdline_paddr; 199 200 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) 201 return; 202 203 __sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true); 204 205 /* Get the command line address after mapping the real_mode_data */ 206 boot_data = (struct boot_params *)real_mode_data; 207 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); 208 209 if (!cmdline_paddr) 210 return; 211 212 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); 213 } 214 215 static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) 216 { 217 unsigned long pfn = 0; 218 pgprot_t prot; 219 220 switch (level) { 221 case PG_LEVEL_4K: 222 pfn = pte_pfn(*kpte); 223 prot = pte_pgprot(*kpte); 224 break; 225 case PG_LEVEL_2M: 226 pfn = pmd_pfn(*(pmd_t *)kpte); 227 prot = pmd_pgprot(*(pmd_t *)kpte); 228 break; 229 case PG_LEVEL_1G: 230 pfn = pud_pfn(*(pud_t *)kpte); 231 prot = pud_pgprot(*(pud_t *)kpte); 232 break; 233 default: 234 WARN_ONCE(1, "Invalid level for kpte\n"); 235 return 0; 236 } 237 238 if (ret_prot) 239 *ret_prot = prot; 240 241 return pfn; 242 } 243 244 static bool amd_enc_tlb_flush_required(bool enc) 245 { 246 return true; 247 } 248 249 static bool amd_enc_cache_flush_required(void) 250 { 251 return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); 252 } 253 254 static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) 255 { 256 #ifdef CONFIG_PARAVIRT 257 unsigned long vaddr_end = vaddr + size; 258 259 while (vaddr < vaddr_end) { 260 int psize, pmask, level; 261 unsigned long pfn; 262 pte_t *kpte; 263 264 kpte = lookup_address(vaddr, &level); 265 if (!kpte || pte_none(*kpte)) { 266 WARN_ONCE(1, "kpte lookup for vaddr\n"); 267 return; 268 } 269 270 pfn = pg_level_to_pfn(level, kpte, NULL); 271 if (!pfn) 272 continue; 273 274 psize = page_level_size(level); 275 pmask = page_level_mask(level); 276 277 notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc); 278 279 vaddr = (vaddr & pmask) + psize; 280 } 281 #endif 282 } 283 284 static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) 285 { 286 /* 287 * To maintain the security guarantees of SEV-SNP guests, make sure 288 * to invalidate the memory before encryption attribute is cleared. 289 */ 290 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) 291 snp_set_memory_shared(vaddr, npages); 292 293 return 0; 294 } 295 296 /* Return true unconditionally: return value doesn't matter for the SEV side */ 297 static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) 298 { 299 /* 300 * After memory is mapped encrypted in the page table, validate it 301 * so that it is consistent with the page table updates. 302 */ 303 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc) 304 snp_set_memory_private(vaddr, npages); 305 306 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) 307 enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc); 308 309 return 0; 310 } 311 312 int prepare_pte_enc(struct pte_enc_desc *d) 313 { 314 pgprot_t old_prot; 315 316 d->pfn = pg_level_to_pfn(d->pte_level, d->kpte, &old_prot); 317 if (!d->pfn) 318 return 1; 319 320 d->new_pgprot = old_prot; 321 if (d->encrypt) 322 pgprot_val(d->new_pgprot) |= _PAGE_ENC; 323 else 324 pgprot_val(d->new_pgprot) &= ~_PAGE_ENC; 325 326 /* If prot is same then do nothing. */ 327 if (pgprot_val(old_prot) == pgprot_val(d->new_pgprot)) 328 return 1; 329 330 d->pa = d->pfn << PAGE_SHIFT; 331 d->size = page_level_size(d->pte_level); 332 333 /* 334 * In-place en-/decryption and physical page attribute change 335 * from C=1 to C=0 or vice versa will be performed. Flush the 336 * caches to ensure that data gets accessed with the correct 337 * C-bit. 338 */ 339 if (d->va) 340 clflush_cache_range(d->va, d->size); 341 else 342 clflush_cache_range(__va(d->pa), d->size); 343 344 return 0; 345 } 346 347 void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) 348 { 349 pte_t new_pte; 350 351 /* Change the page encryption mask. */ 352 new_pte = pfn_pte(pfn, new_prot); 353 set_pte_atomic(kpte, new_pte); 354 } 355 356 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) 357 { 358 struct pte_enc_desc d = { 359 .kpte = kpte, 360 .pte_level = level, 361 .encrypt = enc 362 }; 363 364 if (prepare_pte_enc(&d)) 365 return; 366 367 /* Encrypt/decrypt the contents in-place */ 368 if (enc) { 369 sme_early_encrypt(d.pa, d.size); 370 } else { 371 sme_early_decrypt(d.pa, d.size); 372 373 /* 374 * ON SNP, the page state in the RMP table must happen 375 * before the page table updates. 376 */ 377 early_snp_set_memory_shared((unsigned long)__va(d.pa), d.pa, 1); 378 } 379 380 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 381 382 /* 383 * If page is set encrypted in the page table, then update the RMP table to 384 * add this page as private. 385 */ 386 if (enc) 387 early_snp_set_memory_private((unsigned long)__va(d.pa), d.pa, 1); 388 } 389 390 static int __init early_set_memory_enc_dec(unsigned long vaddr, 391 unsigned long size, bool enc) 392 { 393 unsigned long vaddr_end, vaddr_next, start; 394 unsigned long psize, pmask; 395 int split_page_size_mask; 396 int level, ret; 397 pte_t *kpte; 398 399 start = vaddr; 400 vaddr_next = vaddr; 401 vaddr_end = vaddr + size; 402 403 for (; vaddr < vaddr_end; vaddr = vaddr_next) { 404 kpte = lookup_address(vaddr, &level); 405 if (!kpte || pte_none(*kpte)) { 406 ret = 1; 407 goto out; 408 } 409 410 if (level == PG_LEVEL_4K) { 411 __set_clr_pte_enc(kpte, level, enc); 412 vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; 413 continue; 414 } 415 416 psize = page_level_size(level); 417 pmask = page_level_mask(level); 418 419 /* 420 * Check whether we can change the large page in one go. 421 * We request a split when the address is not aligned and 422 * the number of pages to set/clear encryption bit is smaller 423 * than the number of pages in the large page. 424 */ 425 if (vaddr == (vaddr & pmask) && 426 ((vaddr_end - vaddr) >= psize)) { 427 __set_clr_pte_enc(kpte, level, enc); 428 vaddr_next = (vaddr & pmask) + psize; 429 continue; 430 } 431 432 /* 433 * The virtual address is part of a larger page, create the next 434 * level page table mapping (4K or 2M). If it is part of a 2M 435 * page then we request a split of the large page into 4K 436 * chunks. A 1GB large page is split into 2M pages, resp. 437 */ 438 if (level == PG_LEVEL_2M) 439 split_page_size_mask = 0; 440 else 441 split_page_size_mask = 1 << PG_LEVEL_2M; 442 443 /* 444 * kernel_physical_mapping_change() does not flush the TLBs, so 445 * a TLB flush is required after we exit from the for loop. 446 */ 447 kernel_physical_mapping_change(__pa(vaddr & pmask), 448 __pa((vaddr_end & pmask) + psize), 449 split_page_size_mask); 450 } 451 452 ret = 0; 453 454 early_set_mem_enc_dec_hypercall(start, size, enc); 455 out: 456 __flush_tlb_all(); 457 return ret; 458 } 459 460 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) 461 { 462 return early_set_memory_enc_dec(vaddr, size, false); 463 } 464 465 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) 466 { 467 return early_set_memory_enc_dec(vaddr, size, true); 468 } 469 470 void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) 471 { 472 enc_dec_hypercall(vaddr, size, enc); 473 } 474 475 void __init sme_early_init(void) 476 { 477 if (!sme_me_mask) 478 return; 479 480 early_pmd_flags = __sme_set(early_pmd_flags); 481 482 __supported_pte_mask = __sme_set(__supported_pte_mask); 483 484 /* Update the protection map with memory encryption mask */ 485 add_encrypt_protection_map(); 486 487 x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; 488 x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; 489 x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; 490 x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; 491 x86_platform.guest.enc_kexec_begin = snp_kexec_begin; 492 x86_platform.guest.enc_kexec_finish = snp_kexec_finish; 493 494 /* 495 * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the 496 * parallel bringup low level code. That raises #VC which cannot be 497 * handled there. 498 * It does not provide a RDMSR GHCB protocol so the early startup 499 * code cannot directly communicate with the secure firmware. The 500 * alternative solution to retrieve the APIC ID via CPUID(0xb), 501 * which is covered by the GHCB protocol, is not viable either 502 * because there is no enforcement of the CPUID(0xb) provided 503 * "initial" APIC ID to be the same as the real APIC ID. 504 * Disable parallel bootup. 505 */ 506 if (sev_status & MSR_AMD64_SEV_ES_ENABLED) 507 x86_cpuinit.parallel_bringup = false; 508 509 /* 510 * The VMM is capable of injecting interrupt 0x80 and triggering the 511 * compatibility syscall path. 512 * 513 * By default, the 32-bit emulation is disabled in order to ensure 514 * the safety of the VM. 515 */ 516 if (sev_status & MSR_AMD64_SEV_ENABLED) 517 ia32_disable(); 518 519 /* 520 * Override init functions that scan the ROM region in SEV-SNP guests, 521 * as this memory is not pre-validated and would thus cause a crash. 522 */ 523 if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { 524 x86_init.mpparse.find_mptable = x86_init_noop; 525 x86_init.pci.init_irq = x86_init_noop; 526 x86_init.resources.probe_roms = x86_init_noop; 527 528 /* 529 * DMI setup behavior for SEV-SNP guests depends on 530 * efi_enabled(EFI_CONFIG_TABLES), which hasn't been 531 * parsed yet. snp_dmi_setup() will run after that 532 * parsing has happened. 533 */ 534 x86_init.resources.dmi_setup = snp_dmi_setup; 535 } 536 537 /* 538 * Switch the SVSM CA mapping (if active) from identity mapped to 539 * kernel mapped. 540 */ 541 snp_update_svsm_ca(); 542 543 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 544 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); 545 } 546 547 void __init mem_encrypt_free_decrypted_mem(void) 548 { 549 unsigned long vaddr, vaddr_end, npages; 550 int r; 551 552 vaddr = (unsigned long)__start_bss_decrypted_unused; 553 vaddr_end = (unsigned long)__end_bss_decrypted; 554 npages = (vaddr_end - vaddr) >> PAGE_SHIFT; 555 556 /* 557 * If the unused memory range was mapped decrypted, change the encryption 558 * attribute from decrypted to encrypted before freeing it. Base the 559 * re-encryption on the same condition used for the decryption in 560 * sme_postprocess_startup(). Higher level abstractions, such as 561 * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM 562 * using vTOM, where sme_me_mask is always zero. 563 */ 564 if (sme_me_mask) { 565 r = set_memory_encrypted(vaddr, npages); 566 if (r) { 567 pr_warn("failed to free unused decrypted pages\n"); 568 return; 569 } 570 } 571 572 free_init_pages("unused decrypted", vaddr, vaddr_end); 573 } 574