1*dbe0ad77SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0 2*dbe0ad77SArd Biesheuvel 3*dbe0ad77SArd Biesheuvel #include <linux/init.h> 4*dbe0ad77SArd Biesheuvel #include <linux/linkage.h> 5*dbe0ad77SArd Biesheuvel #include <linux/types.h> 6*dbe0ad77SArd Biesheuvel #include <linux/kernel.h> 7*dbe0ad77SArd Biesheuvel #include <linux/pgtable.h> 8*dbe0ad77SArd Biesheuvel 9*dbe0ad77SArd Biesheuvel #include <asm/init.h> 10*dbe0ad77SArd Biesheuvel #include <asm/sections.h> 11*dbe0ad77SArd Biesheuvel #include <asm/setup.h> 12*dbe0ad77SArd Biesheuvel #include <asm/sev.h> 13*dbe0ad77SArd Biesheuvel 14*dbe0ad77SArd Biesheuvel extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 15*dbe0ad77SArd Biesheuvel extern unsigned int next_early_pgt; 16*dbe0ad77SArd Biesheuvel 17*dbe0ad77SArd Biesheuvel static inline bool check_la57_support(void) 18*dbe0ad77SArd Biesheuvel { 19*dbe0ad77SArd Biesheuvel if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 20*dbe0ad77SArd Biesheuvel return false; 21*dbe0ad77SArd Biesheuvel 22*dbe0ad77SArd Biesheuvel /* 23*dbe0ad77SArd Biesheuvel * 5-level paging is detected and enabled at kernel decompression 24*dbe0ad77SArd Biesheuvel * stage. Only check if it has been enabled there. 25*dbe0ad77SArd Biesheuvel */ 26*dbe0ad77SArd Biesheuvel if (!(native_read_cr4() & X86_CR4_LA57)) 27*dbe0ad77SArd Biesheuvel return false; 28*dbe0ad77SArd Biesheuvel 29*dbe0ad77SArd Biesheuvel RIP_REL_REF(__pgtable_l5_enabled) = 1; 30*dbe0ad77SArd Biesheuvel RIP_REL_REF(pgdir_shift) = 48; 31*dbe0ad77SArd Biesheuvel RIP_REL_REF(ptrs_per_p4d) = 512; 32*dbe0ad77SArd Biesheuvel RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; 33*dbe0ad77SArd Biesheuvel RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; 34*dbe0ad77SArd Biesheuvel RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; 35*dbe0ad77SArd Biesheuvel 36*dbe0ad77SArd Biesheuvel return true; 37*dbe0ad77SArd Biesheuvel } 38*dbe0ad77SArd Biesheuvel 39*dbe0ad77SArd Biesheuvel static unsigned long __head sme_postprocess_startup(struct boot_params *bp, 40*dbe0ad77SArd Biesheuvel pmdval_t *pmd, 41*dbe0ad77SArd Biesheuvel unsigned long p2v_offset) 42*dbe0ad77SArd Biesheuvel { 43*dbe0ad77SArd Biesheuvel unsigned long paddr, paddr_end; 44*dbe0ad77SArd Biesheuvel int i; 45*dbe0ad77SArd Biesheuvel 46*dbe0ad77SArd Biesheuvel /* Encrypt the kernel and related (if SME is active) */ 47*dbe0ad77SArd Biesheuvel sme_encrypt_kernel(bp); 48*dbe0ad77SArd Biesheuvel 49*dbe0ad77SArd Biesheuvel /* 50*dbe0ad77SArd Biesheuvel * Clear the memory encryption mask from the .bss..decrypted section. 51*dbe0ad77SArd Biesheuvel * The bss section will be memset to zero later in the initialization so 52*dbe0ad77SArd Biesheuvel * there is no need to zero it after changing the memory encryption 53*dbe0ad77SArd Biesheuvel * attribute. 54*dbe0ad77SArd Biesheuvel */ 55*dbe0ad77SArd Biesheuvel if (sme_get_me_mask()) { 56*dbe0ad77SArd Biesheuvel paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); 57*dbe0ad77SArd Biesheuvel paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); 58*dbe0ad77SArd Biesheuvel 59*dbe0ad77SArd Biesheuvel for (; paddr < paddr_end; paddr += PMD_SIZE) { 60*dbe0ad77SArd Biesheuvel /* 61*dbe0ad77SArd Biesheuvel * On SNP, transition the page to shared in the RMP table so that 62*dbe0ad77SArd Biesheuvel * it is consistent with the page table attribute change. 63*dbe0ad77SArd Biesheuvel * 64*dbe0ad77SArd Biesheuvel * __start_bss_decrypted has a virtual address in the high range 65*dbe0ad77SArd Biesheuvel * mapping (kernel .text). PVALIDATE, by way of 66*dbe0ad77SArd Biesheuvel * early_snp_set_memory_shared(), requires a valid virtual 67*dbe0ad77SArd Biesheuvel * address but the kernel is currently running off of the identity 68*dbe0ad77SArd Biesheuvel * mapping so use the PA to get a *currently* valid virtual address. 69*dbe0ad77SArd Biesheuvel */ 70*dbe0ad77SArd Biesheuvel early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD); 71*dbe0ad77SArd Biesheuvel 72*dbe0ad77SArd Biesheuvel i = pmd_index(paddr - p2v_offset); 73*dbe0ad77SArd Biesheuvel pmd[i] -= sme_get_me_mask(); 74*dbe0ad77SArd Biesheuvel } 75*dbe0ad77SArd Biesheuvel } 76*dbe0ad77SArd Biesheuvel 77*dbe0ad77SArd Biesheuvel /* 78*dbe0ad77SArd Biesheuvel * Return the SME encryption mask (if SME is active) to be used as a 79*dbe0ad77SArd Biesheuvel * modifier for the initial pgdir entry programmed into CR3. 80*dbe0ad77SArd Biesheuvel */ 81*dbe0ad77SArd Biesheuvel return sme_get_me_mask(); 82*dbe0ad77SArd Biesheuvel } 83*dbe0ad77SArd Biesheuvel 84*dbe0ad77SArd Biesheuvel /* Code in __startup_64() can be relocated during execution, but the compiler 85*dbe0ad77SArd Biesheuvel * doesn't have to generate PC-relative relocations when accessing globals from 86*dbe0ad77SArd Biesheuvel * that function. Clang actually does not generate them, which leads to 87*dbe0ad77SArd Biesheuvel * boot-time crashes. To work around this problem, every global pointer must 88*dbe0ad77SArd Biesheuvel * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined 89*dbe0ad77SArd Biesheuvel * by subtracting p2v_offset from the RIP-relative address. 90*dbe0ad77SArd Biesheuvel */ 91*dbe0ad77SArd Biesheuvel unsigned long __head __startup_64(unsigned long p2v_offset, 92*dbe0ad77SArd Biesheuvel struct boot_params *bp) 93*dbe0ad77SArd Biesheuvel { 94*dbe0ad77SArd Biesheuvel pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); 95*dbe0ad77SArd Biesheuvel unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); 96*dbe0ad77SArd Biesheuvel unsigned long va_text, va_end; 97*dbe0ad77SArd Biesheuvel unsigned long pgtable_flags; 98*dbe0ad77SArd Biesheuvel unsigned long load_delta; 99*dbe0ad77SArd Biesheuvel pgdval_t *pgd; 100*dbe0ad77SArd Biesheuvel p4dval_t *p4d; 101*dbe0ad77SArd Biesheuvel pudval_t *pud; 102*dbe0ad77SArd Biesheuvel pmdval_t *pmd, pmd_entry; 103*dbe0ad77SArd Biesheuvel bool la57; 104*dbe0ad77SArd Biesheuvel int i; 105*dbe0ad77SArd Biesheuvel 106*dbe0ad77SArd Biesheuvel la57 = check_la57_support(); 107*dbe0ad77SArd Biesheuvel 108*dbe0ad77SArd Biesheuvel /* Is the address too large? */ 109*dbe0ad77SArd Biesheuvel if (physaddr >> MAX_PHYSMEM_BITS) 110*dbe0ad77SArd Biesheuvel for (;;); 111*dbe0ad77SArd Biesheuvel 112*dbe0ad77SArd Biesheuvel /* 113*dbe0ad77SArd Biesheuvel * Compute the delta between the address I am compiled to run at 114*dbe0ad77SArd Biesheuvel * and the address I am actually running at. 115*dbe0ad77SArd Biesheuvel */ 116*dbe0ad77SArd Biesheuvel load_delta = __START_KERNEL_map + p2v_offset; 117*dbe0ad77SArd Biesheuvel RIP_REL_REF(phys_base) = load_delta; 118*dbe0ad77SArd Biesheuvel 119*dbe0ad77SArd Biesheuvel /* Is the address not 2M aligned? */ 120*dbe0ad77SArd Biesheuvel if (load_delta & ~PMD_MASK) 121*dbe0ad77SArd Biesheuvel for (;;); 122*dbe0ad77SArd Biesheuvel 123*dbe0ad77SArd Biesheuvel va_text = physaddr - p2v_offset; 124*dbe0ad77SArd Biesheuvel va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; 125*dbe0ad77SArd Biesheuvel 126*dbe0ad77SArd Biesheuvel /* Include the SME encryption mask in the fixup value */ 127*dbe0ad77SArd Biesheuvel load_delta += sme_get_me_mask(); 128*dbe0ad77SArd Biesheuvel 129*dbe0ad77SArd Biesheuvel /* Fixup the physical addresses in the page table */ 130*dbe0ad77SArd Biesheuvel 131*dbe0ad77SArd Biesheuvel pgd = rip_rel_ptr(early_top_pgt); 132*dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] += load_delta; 133*dbe0ad77SArd Biesheuvel 134*dbe0ad77SArd Biesheuvel if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { 135*dbe0ad77SArd Biesheuvel p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); 136*dbe0ad77SArd Biesheuvel p4d[MAX_PTRS_PER_P4D - 1] += load_delta; 137*dbe0ad77SArd Biesheuvel 138*dbe0ad77SArd Biesheuvel pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; 139*dbe0ad77SArd Biesheuvel } 140*dbe0ad77SArd Biesheuvel 141*dbe0ad77SArd Biesheuvel RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; 142*dbe0ad77SArd Biesheuvel RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta; 143*dbe0ad77SArd Biesheuvel 144*dbe0ad77SArd Biesheuvel for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) 145*dbe0ad77SArd Biesheuvel RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta; 146*dbe0ad77SArd Biesheuvel 147*dbe0ad77SArd Biesheuvel /* 148*dbe0ad77SArd Biesheuvel * Set up the identity mapping for the switchover. These 149*dbe0ad77SArd Biesheuvel * entries should *NOT* have the global bit set! This also 150*dbe0ad77SArd Biesheuvel * creates a bunch of nonsense entries but that is fine -- 151*dbe0ad77SArd Biesheuvel * it avoids problems around wraparound. 152*dbe0ad77SArd Biesheuvel */ 153*dbe0ad77SArd Biesheuvel 154*dbe0ad77SArd Biesheuvel pud = &early_pgts[0]->pmd; 155*dbe0ad77SArd Biesheuvel pmd = &early_pgts[1]->pmd; 156*dbe0ad77SArd Biesheuvel RIP_REL_REF(next_early_pgt) = 2; 157*dbe0ad77SArd Biesheuvel 158*dbe0ad77SArd Biesheuvel pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 159*dbe0ad77SArd Biesheuvel 160*dbe0ad77SArd Biesheuvel if (la57) { 161*dbe0ad77SArd Biesheuvel p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd; 162*dbe0ad77SArd Biesheuvel 163*dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 164*dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 165*dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 166*dbe0ad77SArd Biesheuvel 167*dbe0ad77SArd Biesheuvel i = physaddr >> P4D_SHIFT; 168*dbe0ad77SArd Biesheuvel p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 169*dbe0ad77SArd Biesheuvel p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 170*dbe0ad77SArd Biesheuvel } else { 171*dbe0ad77SArd Biesheuvel i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 172*dbe0ad77SArd Biesheuvel pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 173*dbe0ad77SArd Biesheuvel pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 174*dbe0ad77SArd Biesheuvel } 175*dbe0ad77SArd Biesheuvel 176*dbe0ad77SArd Biesheuvel i = physaddr >> PUD_SHIFT; 177*dbe0ad77SArd Biesheuvel pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 178*dbe0ad77SArd Biesheuvel pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 179*dbe0ad77SArd Biesheuvel 180*dbe0ad77SArd Biesheuvel pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 181*dbe0ad77SArd Biesheuvel /* Filter out unsupported __PAGE_KERNEL_* bits: */ 182*dbe0ad77SArd Biesheuvel pmd_entry &= RIP_REL_REF(__supported_pte_mask); 183*dbe0ad77SArd Biesheuvel pmd_entry += sme_get_me_mask(); 184*dbe0ad77SArd Biesheuvel pmd_entry += physaddr; 185*dbe0ad77SArd Biesheuvel 186*dbe0ad77SArd Biesheuvel for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { 187*dbe0ad77SArd Biesheuvel int idx = i + (physaddr >> PMD_SHIFT); 188*dbe0ad77SArd Biesheuvel 189*dbe0ad77SArd Biesheuvel pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; 190*dbe0ad77SArd Biesheuvel } 191*dbe0ad77SArd Biesheuvel 192*dbe0ad77SArd Biesheuvel /* 193*dbe0ad77SArd Biesheuvel * Fixup the kernel text+data virtual addresses. Note that 194*dbe0ad77SArd Biesheuvel * we might write invalid pmds, when the kernel is relocated 195*dbe0ad77SArd Biesheuvel * cleanup_highmap() fixes this up along with the mappings 196*dbe0ad77SArd Biesheuvel * beyond _end. 197*dbe0ad77SArd Biesheuvel * 198*dbe0ad77SArd Biesheuvel * Only the region occupied by the kernel image has so far 199*dbe0ad77SArd Biesheuvel * been checked against the table of usable memory regions 200*dbe0ad77SArd Biesheuvel * provided by the firmware, so invalidate pages outside that 201*dbe0ad77SArd Biesheuvel * region. A page table entry that maps to a reserved area of 202*dbe0ad77SArd Biesheuvel * memory would allow processor speculation into that area, 203*dbe0ad77SArd Biesheuvel * and on some hardware (particularly the UV platform) even 204*dbe0ad77SArd Biesheuvel * speculative access to some reserved areas is caught as an 205*dbe0ad77SArd Biesheuvel * error, causing the BIOS to halt the system. 206*dbe0ad77SArd Biesheuvel */ 207*dbe0ad77SArd Biesheuvel 208*dbe0ad77SArd Biesheuvel pmd = rip_rel_ptr(level2_kernel_pgt); 209*dbe0ad77SArd Biesheuvel 210*dbe0ad77SArd Biesheuvel /* invalidate pages before the kernel image */ 211*dbe0ad77SArd Biesheuvel for (i = 0; i < pmd_index(va_text); i++) 212*dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 213*dbe0ad77SArd Biesheuvel 214*dbe0ad77SArd Biesheuvel /* fixup pages that are part of the kernel image */ 215*dbe0ad77SArd Biesheuvel for (; i <= pmd_index(va_end); i++) 216*dbe0ad77SArd Biesheuvel if (pmd[i] & _PAGE_PRESENT) 217*dbe0ad77SArd Biesheuvel pmd[i] += load_delta; 218*dbe0ad77SArd Biesheuvel 219*dbe0ad77SArd Biesheuvel /* invalidate pages after the kernel image */ 220*dbe0ad77SArd Biesheuvel for (; i < PTRS_PER_PMD; i++) 221*dbe0ad77SArd Biesheuvel pmd[i] &= ~_PAGE_PRESENT; 222*dbe0ad77SArd Biesheuvel 223*dbe0ad77SArd Biesheuvel return sme_postprocess_startup(bp, pmd, p2v_offset); 224*dbe0ad77SArd Biesheuvel } 225