1dbe0ad77SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0 2dbe0ad77SArd Biesheuvel 3dbe0ad77SArd Biesheuvel #include <linux/init.h> 4dbe0ad77SArd Biesheuvel #include <linux/linkage.h> 5dbe0ad77SArd Biesheuvel #include <linux/types.h> 6dbe0ad77SArd Biesheuvel #include <linux/kernel.h> 7dbe0ad77SArd Biesheuvel #include <linux/pgtable.h> 8dbe0ad77SArd Biesheuvel 9dbe0ad77SArd Biesheuvel #include <asm/init.h> 10dbe0ad77SArd Biesheuvel #include <asm/sections.h> 11dbe0ad77SArd Biesheuvel #include <asm/setup.h> 12dbe0ad77SArd Biesheuvel #include <asm/sev.h> 13dbe0ad77SArd Biesheuvel 14dbe0ad77SArd Biesheuvel extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 15dbe0ad77SArd Biesheuvel extern unsigned int next_early_pgt; 16dbe0ad77SArd Biesheuvel 17dbe0ad77SArd Biesheuvel static inline bool check_la57_support(void) 18dbe0ad77SArd Biesheuvel { 19dbe0ad77SArd Biesheuvel if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 20dbe0ad77SArd Biesheuvel return false; 21dbe0ad77SArd Biesheuvel 22dbe0ad77SArd Biesheuvel /* 23dbe0ad77SArd Biesheuvel * 5-level paging is detected and enabled at kernel decompression 24dbe0ad77SArd Biesheuvel * stage. Only check if it has been enabled there. 25dbe0ad77SArd Biesheuvel */ 26dbe0ad77SArd Biesheuvel if (!(native_read_cr4() & X86_CR4_LA57)) 27dbe0ad77SArd Biesheuvel return false; 28dbe0ad77SArd Biesheuvel 29*dafb26f4SArd Biesheuvel __pgtable_l5_enabled = 1; 30*dafb26f4SArd Biesheuvel pgdir_shift = 48; 31*dafb26f4SArd Biesheuvel ptrs_per_p4d = 512; 32*dafb26f4SArd Biesheuvel page_offset_base = __PAGE_OFFSET_BASE_L5; 33*dafb26f4SArd Biesheuvel vmalloc_base = __VMALLOC_BASE_L5; 34*dafb26f4SArd Biesheuvel vmemmap_base = __VMEMMAP_BASE_L5; 35dbe0ad77SArd Biesheuvel 36dbe0ad77SArd Biesheuvel return true; 37dbe0ad77SArd Biesheuvel } 38dbe0ad77SArd Biesheuvel 39dbe0ad77SArd Biesheuvel static unsigned long __head sme_postprocess_startup(struct boot_params *bp, 40dbe0ad77SArd Biesheuvel pmdval_t *pmd, 41dbe0ad77SArd Biesheuvel unsigned long p2v_offset) 42dbe0ad77SArd Biesheuvel { 43dbe0ad77SArd Biesheuvel unsigned long paddr, paddr_end; 44dbe0ad77SArd Biesheuvel int i; 45dbe0ad77SArd Biesheuvel 46dbe0ad77SArd Biesheuvel /* Encrypt the kernel and related (if SME is active) */ 47dbe0ad77SArd Biesheuvel sme_encrypt_kernel(bp); 48dbe0ad77SArd Biesheuvel 49dbe0ad77SArd Biesheuvel /* 50dbe0ad77SArd Biesheuvel * Clear the memory encryption mask from the .bss..decrypted section. 51dbe0ad77SArd Biesheuvel * The bss section will be memset to zero later in the initialization so 52dbe0ad77SArd Biesheuvel * there is no need to zero it after changing the memory encryption 53dbe0ad77SArd Biesheuvel * attribute. 54dbe0ad77SArd Biesheuvel */ 55dbe0ad77SArd Biesheuvel if (sme_get_me_mask()) { 56dbe0ad77SArd Biesheuvel paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); 57dbe0ad77SArd Biesheuvel paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); 58dbe0ad77SArd Biesheuvel 59dbe0ad77SArd Biesheuvel for (; paddr < paddr_end; paddr += PMD_SIZE) { 60dbe0ad77SArd Biesheuvel /* 61dbe0ad77SArd Biesheuvel * On SNP, transition the page to shared in the RMP table so that 62dbe0ad77SArd Biesheuvel * it is consistent with the page table attribute change. 63dbe0ad77SArd Biesheuvel * 64dbe0ad77SArd Biesheuvel * __start_bss_decrypted has a virtual address in the high range 65dbe0ad77SArd Biesheuvel * mapping (kernel .text). PVALIDATE, by way of 66dbe0ad77SArd Biesheuvel * early_snp_set_memory_shared(), requires a valid virtual 67dbe0ad77SArd Biesheuvel * address but the kernel is currently running off of the identity 68dbe0ad77SArd Biesheuvel * mapping so use the PA to get a *currently* valid virtual address. 69dbe0ad77SArd Biesheuvel */ 70dbe0ad77SArd Biesheuvel early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD); 71dbe0ad77SArd Biesheuvel 72dbe0ad77SArd Biesheuvel i = pmd_index(paddr - p2v_offset); 73dbe0ad77SArd Biesheuvel pmd[i] -= sme_get_me_mask(); 74dbe0ad77SArd Biesheuvel } 75dbe0ad77SArd Biesheuvel } 76dbe0ad77SArd Biesheuvel 77dbe0ad77SArd Biesheuvel /* 78dbe0ad77SArd Biesheuvel * Return the SME encryption mask (if SME is active) to be used as a 79dbe0ad77SArd Biesheuvel * modifier for the initial pgdir entry programmed into CR3. 80dbe0ad77SArd Biesheuvel */ 81dbe0ad77SArd Biesheuvel return sme_get_me_mask(); 82dbe0ad77SArd Biesheuvel } 83dbe0ad77SArd Biesheuvel 84*dafb26f4SArd Biesheuvel /* 85*dafb26f4SArd Biesheuvel * This code is compiled using PIC codegen because it will execute from the 86*dafb26f4SArd Biesheuvel * early 1:1 mapping of memory, which deviates from the mapping expected by the 87*dafb26f4SArd Biesheuvel * linker. Due to this deviation, taking the address of a global variable will 88*dafb26f4SArd Biesheuvel * produce an ambiguous result when using the plain & operator. Instead, 89*dafb26f4SArd Biesheuvel * rip_rel_ptr() must be used, which will return the RIP-relative address in 90*dafb26f4SArd Biesheuvel * the 1:1 mapping of memory. Kernel virtual addresses can be determined by 91*dafb26f4SArd Biesheuvel * subtracting p2v_offset from the RIP-relative address. 92dbe0ad77SArd Biesheuvel */ 93dbe0ad77SArd Biesheuvel unsigned long __head __startup_64(unsigned long p2v_offset, 94dbe0ad77SArd Biesheuvel struct boot_params *bp) 95dbe0ad77SArd Biesheuvel { 96dbe0ad77SArd Biesheuvel pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); 97dbe0ad77SArd Biesheuvel unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); 98dbe0ad77SArd Biesheuvel unsigned long va_text, va_end; 99dbe0ad77SArd Biesheuvel unsigned long pgtable_flags; 100dbe0ad77SArd Biesheuvel unsigned long load_delta; 101dbe0ad77SArd Biesheuvel pgdval_t *pgd; 102dbe0ad77SArd Biesheuvel p4dval_t *p4d; 103dbe0ad77SArd Biesheuvel pudval_t *pud; 104dbe0ad77SArd Biesheuvel pmdval_t *pmd, pmd_entry; 105dbe0ad77SArd Biesheuvel bool la57; 106dbe0ad77SArd Biesheuvel int i; 107dbe0ad77SArd Biesheuvel 108dbe0ad77SArd Biesheuvel la57 = check_la57_support(); 109dbe0ad77SArd Biesheuvel 110dbe0ad77SArd Biesheuvel /* Is the address too large? */ 111dbe0ad77SArd Biesheuvel if (physaddr >> MAX_PHYSMEM_BITS) 112dbe0ad77SArd Biesheuvel for (;;); 113dbe0ad77SArd Biesheuvel 114dbe0ad77SArd Biesheuvel /* 115dbe0ad77SArd Biesheuvel * Compute the delta between the address I am compiled to run at 116dbe0ad77SArd Biesheuvel * and the address I am actually running at. 117dbe0ad77SArd Biesheuvel */ 118*dafb26f4SArd Biesheuvel phys_base = load_delta = __START_KERNEL_map + p2v_offset; 119dbe0ad77SArd Biesheuvel 120dbe0ad77SArd Biesheuvel /* Is the address not 2M aligned? */ 121dbe0ad77SArd Biesheuvel if (load_delta & ~PMD_MASK) 122dbe0ad77SArd Biesheuvel for (;;); 123dbe0ad77SArd Biesheuvel 124dbe0ad77SArd Biesheuvel va_text = physaddr - p2v_offset; 125dbe0ad77SArd Biesheuvel va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; 126dbe0ad77SArd Biesheuvel 127dbe0ad77SArd Biesheuvel /* Include the SME encryption mask in the fixup value */ 128dbe0ad77SArd Biesheuvel load_delta += sme_get_me_mask(); 129dbe0ad77SArd Biesheuvel 130dbe0ad77SArd Biesheuvel /* Fixup the physical addresses in the page table */ 131dbe0ad77SArd Biesheuvel 132dbe0ad77SArd Biesheuvel pgd = rip_rel_ptr(early_top_pgt); 133dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] += load_delta; 134dbe0ad77SArd Biesheuvel 135dbe0ad77SArd Biesheuvel if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { 136dbe0ad77SArd Biesheuvel p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); 137dbe0ad77SArd Biesheuvel p4d[MAX_PTRS_PER_P4D - 1] += load_delta; 138dbe0ad77SArd Biesheuvel 139dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; 140dbe0ad77SArd Biesheuvel } 141dbe0ad77SArd Biesheuvel 142*dafb26f4SArd Biesheuvel level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta; 143*dafb26f4SArd Biesheuvel level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta; 144dbe0ad77SArd Biesheuvel 145dbe0ad77SArd Biesheuvel for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) 146*dafb26f4SArd Biesheuvel level2_fixmap_pgt[i].pmd += load_delta; 147dbe0ad77SArd Biesheuvel 148dbe0ad77SArd Biesheuvel /* 149dbe0ad77SArd Biesheuvel * Set up the identity mapping for the switchover. These 150dbe0ad77SArd Biesheuvel * entries should *NOT* have the global bit set! This also 151dbe0ad77SArd Biesheuvel * creates a bunch of nonsense entries but that is fine -- 152dbe0ad77SArd Biesheuvel * it avoids problems around wraparound. 153dbe0ad77SArd Biesheuvel */ 154dbe0ad77SArd Biesheuvel 155dbe0ad77SArd Biesheuvel pud = &early_pgts[0]->pmd; 156dbe0ad77SArd Biesheuvel pmd = &early_pgts[1]->pmd; 157*dafb26f4SArd Biesheuvel next_early_pgt = 2; 158dbe0ad77SArd Biesheuvel 159dbe0ad77SArd Biesheuvel pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 160dbe0ad77SArd Biesheuvel 161dbe0ad77SArd Biesheuvel if (la57) { 162*dafb26f4SArd Biesheuvel p4d = &early_pgts[next_early_pgt++]->pmd; 163dbe0ad77SArd Biesheuvel 164dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 165dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 166dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 167dbe0ad77SArd Biesheuvel 168dbe0ad77SArd Biesheuvel i = physaddr >> P4D_SHIFT; 169dbe0ad77SArd Biesheuvel p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 170dbe0ad77SArd Biesheuvel p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 171dbe0ad77SArd Biesheuvel } else { 172dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 173dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 174dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 175dbe0ad77SArd Biesheuvel } 176dbe0ad77SArd Biesheuvel 177dbe0ad77SArd Biesheuvel i = physaddr >> PUD_SHIFT; 178dbe0ad77SArd Biesheuvel pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 179dbe0ad77SArd Biesheuvel pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 180dbe0ad77SArd Biesheuvel 181dbe0ad77SArd Biesheuvel pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 182dbe0ad77SArd Biesheuvel /* Filter out unsupported __PAGE_KERNEL_* bits: */ 183*dafb26f4SArd Biesheuvel pmd_entry &= __supported_pte_mask; 184dbe0ad77SArd Biesheuvel pmd_entry += sme_get_me_mask(); 185dbe0ad77SArd Biesheuvel pmd_entry += physaddr; 186dbe0ad77SArd Biesheuvel 187dbe0ad77SArd Biesheuvel for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { 188dbe0ad77SArd Biesheuvel int idx = i + (physaddr >> PMD_SHIFT); 189dbe0ad77SArd Biesheuvel 190dbe0ad77SArd Biesheuvel pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; 191dbe0ad77SArd Biesheuvel } 192dbe0ad77SArd Biesheuvel 193dbe0ad77SArd Biesheuvel /* 194dbe0ad77SArd Biesheuvel * Fixup the kernel text+data virtual addresses. Note that 195dbe0ad77SArd Biesheuvel * we might write invalid pmds, when the kernel is relocated 196dbe0ad77SArd Biesheuvel * cleanup_highmap() fixes this up along with the mappings 197dbe0ad77SArd Biesheuvel * beyond _end. 198dbe0ad77SArd Biesheuvel * 199dbe0ad77SArd Biesheuvel * Only the region occupied by the kernel image has so far 200dbe0ad77SArd Biesheuvel * been checked against the table of usable memory regions 201dbe0ad77SArd Biesheuvel * provided by the firmware, so invalidate pages outside that 202dbe0ad77SArd Biesheuvel * region. A page table entry that maps to a reserved area of 203dbe0ad77SArd Biesheuvel * memory would allow processor speculation into that area, 204dbe0ad77SArd Biesheuvel * and on some hardware (particularly the UV platform) even 205dbe0ad77SArd Biesheuvel * speculative access to some reserved areas is caught as an 206dbe0ad77SArd Biesheuvel * error, causing the BIOS to halt the system. 207dbe0ad77SArd Biesheuvel */ 208dbe0ad77SArd Biesheuvel 209dbe0ad77SArd Biesheuvel pmd = rip_rel_ptr(level2_kernel_pgt); 210dbe0ad77SArd Biesheuvel 211dbe0ad77SArd Biesheuvel /* invalidate pages before the kernel image */ 212dbe0ad77SArd Biesheuvel for (i = 0; i < pmd_index(va_text); i++) 213dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 214dbe0ad77SArd Biesheuvel 215dbe0ad77SArd Biesheuvel /* fixup pages that are part of the kernel image */ 216dbe0ad77SArd Biesheuvel for (; i <= pmd_index(va_end); i++) 217dbe0ad77SArd Biesheuvel if (pmd[i] & _PAGE_PRESENT) 218dbe0ad77SArd Biesheuvel pmd[i] += load_delta; 219dbe0ad77SArd Biesheuvel 220dbe0ad77SArd Biesheuvel /* invalidate pages after the kernel image */ 221dbe0ad77SArd Biesheuvel for (; i < PTRS_PER_PMD; i++) 222dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 223dbe0ad77SArd Biesheuvel 224dbe0ad77SArd Biesheuvel return sme_postprocess_startup(bp, pmd, p2v_offset); 225dbe0ad77SArd Biesheuvel } 226