1dbe0ad77SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0 2dbe0ad77SArd Biesheuvel 3dbe0ad77SArd Biesheuvel #include <linux/init.h> 4dbe0ad77SArd Biesheuvel #include <linux/linkage.h> 5dbe0ad77SArd Biesheuvel #include <linux/types.h> 6dbe0ad77SArd Biesheuvel #include <linux/kernel.h> 7dbe0ad77SArd Biesheuvel #include <linux/pgtable.h> 8dbe0ad77SArd Biesheuvel 9dbe0ad77SArd Biesheuvel #include <asm/init.h> 10dbe0ad77SArd Biesheuvel #include <asm/sections.h> 11dbe0ad77SArd Biesheuvel #include <asm/setup.h> 12dbe0ad77SArd Biesheuvel #include <asm/sev.h> 13dbe0ad77SArd Biesheuvel 14dbe0ad77SArd Biesheuvel extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 15dbe0ad77SArd Biesheuvel extern unsigned int next_early_pgt; 16dbe0ad77SArd Biesheuvel 17dbe0ad77SArd Biesheuvel static inline bool check_la57_support(void) 18dbe0ad77SArd Biesheuvel { 19dbe0ad77SArd Biesheuvel /* 20dbe0ad77SArd Biesheuvel * 5-level paging is detected and enabled at kernel decompression 21dbe0ad77SArd Biesheuvel * stage. Only check if it has been enabled there. 22dbe0ad77SArd Biesheuvel */ 23dbe0ad77SArd Biesheuvel if (!(native_read_cr4() & X86_CR4_LA57)) 24dbe0ad77SArd Biesheuvel return false; 25dbe0ad77SArd Biesheuvel 26dafb26f4SArd Biesheuvel __pgtable_l5_enabled = 1; 27dafb26f4SArd Biesheuvel pgdir_shift = 48; 28dafb26f4SArd Biesheuvel ptrs_per_p4d = 512; 29dbe0ad77SArd Biesheuvel 30dbe0ad77SArd Biesheuvel return true; 31dbe0ad77SArd Biesheuvel } 32dbe0ad77SArd Biesheuvel 33dbe0ad77SArd Biesheuvel static unsigned long __head sme_postprocess_startup(struct boot_params *bp, 34dbe0ad77SArd Biesheuvel pmdval_t *pmd, 35dbe0ad77SArd Biesheuvel unsigned long p2v_offset) 36dbe0ad77SArd Biesheuvel { 37dbe0ad77SArd Biesheuvel unsigned long paddr, paddr_end; 38dbe0ad77SArd Biesheuvel int i; 39dbe0ad77SArd Biesheuvel 40dbe0ad77SArd Biesheuvel /* Encrypt the kernel and related (if SME is active) */ 41dbe0ad77SArd Biesheuvel sme_encrypt_kernel(bp); 42dbe0ad77SArd Biesheuvel 43dbe0ad77SArd Biesheuvel /* 44dbe0ad77SArd Biesheuvel * Clear the memory encryption mask from the .bss..decrypted section. 45dbe0ad77SArd Biesheuvel * The bss section will be memset to zero later in the initialization so 46dbe0ad77SArd Biesheuvel * there is no need to zero it after changing the memory encryption 47dbe0ad77SArd Biesheuvel * attribute. 48dbe0ad77SArd Biesheuvel */ 49dbe0ad77SArd Biesheuvel if (sme_get_me_mask()) { 50dbe0ad77SArd Biesheuvel paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); 51dbe0ad77SArd Biesheuvel paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); 52dbe0ad77SArd Biesheuvel 53dbe0ad77SArd Biesheuvel for (; paddr < paddr_end; paddr += PMD_SIZE) { 54dbe0ad77SArd Biesheuvel /* 55dbe0ad77SArd Biesheuvel * On SNP, transition the page to shared in the RMP table so that 56dbe0ad77SArd Biesheuvel * it is consistent with the page table attribute change. 57dbe0ad77SArd Biesheuvel * 58dbe0ad77SArd Biesheuvel * __start_bss_decrypted has a virtual address in the high range 59dbe0ad77SArd Biesheuvel * mapping (kernel .text). PVALIDATE, by way of 60dbe0ad77SArd Biesheuvel * early_snp_set_memory_shared(), requires a valid virtual 61dbe0ad77SArd Biesheuvel * address but the kernel is currently running off of the identity 62dbe0ad77SArd Biesheuvel * mapping so use the PA to get a *currently* valid virtual address. 63dbe0ad77SArd Biesheuvel */ 64dbe0ad77SArd Biesheuvel early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD); 65dbe0ad77SArd Biesheuvel 66dbe0ad77SArd Biesheuvel i = pmd_index(paddr - p2v_offset); 67dbe0ad77SArd Biesheuvel pmd[i] -= sme_get_me_mask(); 68dbe0ad77SArd Biesheuvel } 69dbe0ad77SArd Biesheuvel } 70dbe0ad77SArd Biesheuvel 71dbe0ad77SArd Biesheuvel /* 72dbe0ad77SArd Biesheuvel * Return the SME encryption mask (if SME is active) to be used as a 73dbe0ad77SArd Biesheuvel * modifier for the initial pgdir entry programmed into CR3. 74dbe0ad77SArd Biesheuvel */ 75dbe0ad77SArd Biesheuvel return sme_get_me_mask(); 76dbe0ad77SArd Biesheuvel } 77dbe0ad77SArd Biesheuvel 78dafb26f4SArd Biesheuvel /* 79dafb26f4SArd Biesheuvel * This code is compiled using PIC codegen because it will execute from the 80dafb26f4SArd Biesheuvel * early 1:1 mapping of memory, which deviates from the mapping expected by the 81dafb26f4SArd Biesheuvel * linker. Due to this deviation, taking the address of a global variable will 82dafb26f4SArd Biesheuvel * produce an ambiguous result when using the plain & operator. Instead, 83dafb26f4SArd Biesheuvel * rip_rel_ptr() must be used, which will return the RIP-relative address in 84dafb26f4SArd Biesheuvel * the 1:1 mapping of memory. Kernel virtual addresses can be determined by 85dafb26f4SArd Biesheuvel * subtracting p2v_offset from the RIP-relative address. 86dbe0ad77SArd Biesheuvel */ 87dbe0ad77SArd Biesheuvel unsigned long __head __startup_64(unsigned long p2v_offset, 88dbe0ad77SArd Biesheuvel struct boot_params *bp) 89dbe0ad77SArd Biesheuvel { 90dbe0ad77SArd Biesheuvel pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); 91dbe0ad77SArd Biesheuvel unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); 92dbe0ad77SArd Biesheuvel unsigned long va_text, va_end; 93dbe0ad77SArd Biesheuvel unsigned long pgtable_flags; 94dbe0ad77SArd Biesheuvel unsigned long load_delta; 95dbe0ad77SArd Biesheuvel pgdval_t *pgd; 96dbe0ad77SArd Biesheuvel p4dval_t *p4d; 97dbe0ad77SArd Biesheuvel pudval_t *pud; 98dbe0ad77SArd Biesheuvel pmdval_t *pmd, pmd_entry; 99dbe0ad77SArd Biesheuvel bool la57; 100dbe0ad77SArd Biesheuvel int i; 101dbe0ad77SArd Biesheuvel 102dbe0ad77SArd Biesheuvel la57 = check_la57_support(); 103dbe0ad77SArd Biesheuvel 104dbe0ad77SArd Biesheuvel /* Is the address too large? */ 105dbe0ad77SArd Biesheuvel if (physaddr >> MAX_PHYSMEM_BITS) 106dbe0ad77SArd Biesheuvel for (;;); 107dbe0ad77SArd Biesheuvel 108dbe0ad77SArd Biesheuvel /* 109dbe0ad77SArd Biesheuvel * Compute the delta between the address I am compiled to run at 110dbe0ad77SArd Biesheuvel * and the address I am actually running at. 111dbe0ad77SArd Biesheuvel */ 112dafb26f4SArd Biesheuvel phys_base = load_delta = __START_KERNEL_map + p2v_offset; 113dbe0ad77SArd Biesheuvel 114dbe0ad77SArd Biesheuvel /* Is the address not 2M aligned? */ 115dbe0ad77SArd Biesheuvel if (load_delta & ~PMD_MASK) 116dbe0ad77SArd Biesheuvel for (;;); 117dbe0ad77SArd Biesheuvel 118dbe0ad77SArd Biesheuvel va_text = physaddr - p2v_offset; 119dbe0ad77SArd Biesheuvel va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; 120dbe0ad77SArd Biesheuvel 121dbe0ad77SArd Biesheuvel /* Include the SME encryption mask in the fixup value */ 122dbe0ad77SArd Biesheuvel load_delta += sme_get_me_mask(); 123dbe0ad77SArd Biesheuvel 124dbe0ad77SArd Biesheuvel /* Fixup the physical addresses in the page table */ 125dbe0ad77SArd Biesheuvel 126dbe0ad77SArd Biesheuvel pgd = rip_rel_ptr(early_top_pgt); 127dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] += load_delta; 128dbe0ad77SArd Biesheuvel 129*7212b58dSKirill A. Shutemov if (la57) { 130dbe0ad77SArd Biesheuvel p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); 131dbe0ad77SArd Biesheuvel p4d[MAX_PTRS_PER_P4D - 1] += load_delta; 132dbe0ad77SArd Biesheuvel 133dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; 134dbe0ad77SArd Biesheuvel } 135dbe0ad77SArd Biesheuvel 136dafb26f4SArd Biesheuvel level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta; 137dafb26f4SArd Biesheuvel level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta; 138dbe0ad77SArd Biesheuvel 139dbe0ad77SArd Biesheuvel for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) 140dafb26f4SArd Biesheuvel level2_fixmap_pgt[i].pmd += load_delta; 141dbe0ad77SArd Biesheuvel 142dbe0ad77SArd Biesheuvel /* 143dbe0ad77SArd Biesheuvel * Set up the identity mapping for the switchover. These 144dbe0ad77SArd Biesheuvel * entries should *NOT* have the global bit set! This also 145dbe0ad77SArd Biesheuvel * creates a bunch of nonsense entries but that is fine -- 146dbe0ad77SArd Biesheuvel * it avoids problems around wraparound. 147dbe0ad77SArd Biesheuvel */ 148dbe0ad77SArd Biesheuvel 149dbe0ad77SArd Biesheuvel pud = &early_pgts[0]->pmd; 150dbe0ad77SArd Biesheuvel pmd = &early_pgts[1]->pmd; 151dafb26f4SArd Biesheuvel next_early_pgt = 2; 152dbe0ad77SArd Biesheuvel 153dbe0ad77SArd Biesheuvel pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 154dbe0ad77SArd Biesheuvel 155dbe0ad77SArd Biesheuvel if (la57) { 156dafb26f4SArd Biesheuvel p4d = &early_pgts[next_early_pgt++]->pmd; 157dbe0ad77SArd Biesheuvel 158dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 159dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 160dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 161dbe0ad77SArd Biesheuvel 162dbe0ad77SArd Biesheuvel i = physaddr >> P4D_SHIFT; 163dbe0ad77SArd Biesheuvel p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 164dbe0ad77SArd Biesheuvel p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 165dbe0ad77SArd Biesheuvel } else { 166dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 167dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 168dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 169dbe0ad77SArd Biesheuvel } 170dbe0ad77SArd Biesheuvel 171dbe0ad77SArd Biesheuvel i = physaddr >> PUD_SHIFT; 172dbe0ad77SArd Biesheuvel pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 173dbe0ad77SArd Biesheuvel pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 174dbe0ad77SArd Biesheuvel 175dbe0ad77SArd Biesheuvel pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 176dbe0ad77SArd Biesheuvel pmd_entry += sme_get_me_mask(); 177dbe0ad77SArd Biesheuvel pmd_entry += physaddr; 178dbe0ad77SArd Biesheuvel 179dbe0ad77SArd Biesheuvel for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { 180dbe0ad77SArd Biesheuvel int idx = i + (physaddr >> PMD_SHIFT); 181dbe0ad77SArd Biesheuvel 182dbe0ad77SArd Biesheuvel pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; 183dbe0ad77SArd Biesheuvel } 184dbe0ad77SArd Biesheuvel 185dbe0ad77SArd Biesheuvel /* 186dbe0ad77SArd Biesheuvel * Fixup the kernel text+data virtual addresses. Note that 187dbe0ad77SArd Biesheuvel * we might write invalid pmds, when the kernel is relocated 188dbe0ad77SArd Biesheuvel * cleanup_highmap() fixes this up along with the mappings 189dbe0ad77SArd Biesheuvel * beyond _end. 190dbe0ad77SArd Biesheuvel * 191dbe0ad77SArd Biesheuvel * Only the region occupied by the kernel image has so far 192dbe0ad77SArd Biesheuvel * been checked against the table of usable memory regions 193dbe0ad77SArd Biesheuvel * provided by the firmware, so invalidate pages outside that 194dbe0ad77SArd Biesheuvel * region. A page table entry that maps to a reserved area of 195dbe0ad77SArd Biesheuvel * memory would allow processor speculation into that area, 196dbe0ad77SArd Biesheuvel * and on some hardware (particularly the UV platform) even 197dbe0ad77SArd Biesheuvel * speculative access to some reserved areas is caught as an 198dbe0ad77SArd Biesheuvel * error, causing the BIOS to halt the system. 199dbe0ad77SArd Biesheuvel */ 200dbe0ad77SArd Biesheuvel 201dbe0ad77SArd Biesheuvel pmd = rip_rel_ptr(level2_kernel_pgt); 202dbe0ad77SArd Biesheuvel 203dbe0ad77SArd Biesheuvel /* invalidate pages before the kernel image */ 204dbe0ad77SArd Biesheuvel for (i = 0; i < pmd_index(va_text); i++) 205dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 206dbe0ad77SArd Biesheuvel 207dbe0ad77SArd Biesheuvel /* fixup pages that are part of the kernel image */ 208dbe0ad77SArd Biesheuvel for (; i <= pmd_index(va_end); i++) 209dbe0ad77SArd Biesheuvel if (pmd[i] & _PAGE_PRESENT) 210dbe0ad77SArd Biesheuvel pmd[i] += load_delta; 211dbe0ad77SArd Biesheuvel 212dbe0ad77SArd Biesheuvel /* invalidate pages after the kernel image */ 213dbe0ad77SArd Biesheuvel for (; i < PTRS_PER_PMD; i++) 214dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 215dbe0ad77SArd Biesheuvel 216dbe0ad77SArd Biesheuvel return sme_postprocess_startup(bp, pmd, p2v_offset); 217dbe0ad77SArd Biesheuvel } 218