1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/init.h> 4 #include <linux/linkage.h> 5 #include <linux/types.h> 6 #include <linux/kernel.h> 7 #include <linux/pgtable.h> 8 9 #include <asm/init.h> 10 #include <asm/sections.h> 11 #include <asm/setup.h> 12 #include <asm/sev.h> 13 14 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 15 extern unsigned int next_early_pgt; 16 17 static inline bool check_la57_support(void) 18 { 19 if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 20 return false; 21 22 /* 23 * 5-level paging is detected and enabled at kernel decompression 24 * stage. Only check if it has been enabled there. 25 */ 26 if (!(native_read_cr4() & X86_CR4_LA57)) 27 return false; 28 29 __pgtable_l5_enabled = 1; 30 pgdir_shift = 48; 31 ptrs_per_p4d = 512; 32 33 return true; 34 } 35 36 static unsigned long __head sme_postprocess_startup(struct boot_params *bp, 37 pmdval_t *pmd, 38 unsigned long p2v_offset) 39 { 40 unsigned long paddr, paddr_end; 41 int i; 42 43 /* Encrypt the kernel and related (if SME is active) */ 44 sme_encrypt_kernel(bp); 45 46 /* 47 * Clear the memory encryption mask from the .bss..decrypted section. 48 * The bss section will be memset to zero later in the initialization so 49 * there is no need to zero it after changing the memory encryption 50 * attribute. 51 */ 52 if (sme_get_me_mask()) { 53 paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); 54 paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); 55 56 for (; paddr < paddr_end; paddr += PMD_SIZE) { 57 /* 58 * On SNP, transition the page to shared in the RMP table so that 59 * it is consistent with the page table attribute change. 60 * 61 * __start_bss_decrypted has a virtual address in the high range 62 * mapping (kernel .text). PVALIDATE, by way of 63 * early_snp_set_memory_shared(), requires a valid virtual 64 * address but the kernel is currently running off of the identity 65 * mapping so use the PA to get a *currently* valid virtual address. 66 */ 67 early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD); 68 69 i = pmd_index(paddr - p2v_offset); 70 pmd[i] -= sme_get_me_mask(); 71 } 72 } 73 74 /* 75 * Return the SME encryption mask (if SME is active) to be used as a 76 * modifier for the initial pgdir entry programmed into CR3. 77 */ 78 return sme_get_me_mask(); 79 } 80 81 /* 82 * This code is compiled using PIC codegen because it will execute from the 83 * early 1:1 mapping of memory, which deviates from the mapping expected by the 84 * linker. Due to this deviation, taking the address of a global variable will 85 * produce an ambiguous result when using the plain & operator. Instead, 86 * rip_rel_ptr() must be used, which will return the RIP-relative address in 87 * the 1:1 mapping of memory. Kernel virtual addresses can be determined by 88 * subtracting p2v_offset from the RIP-relative address. 89 */ 90 unsigned long __head __startup_64(unsigned long p2v_offset, 91 struct boot_params *bp) 92 { 93 pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); 94 unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); 95 unsigned long va_text, va_end; 96 unsigned long pgtable_flags; 97 unsigned long load_delta; 98 pgdval_t *pgd; 99 p4dval_t *p4d; 100 pudval_t *pud; 101 pmdval_t *pmd, pmd_entry; 102 bool la57; 103 int i; 104 105 la57 = check_la57_support(); 106 107 /* Is the address too large? */ 108 if (physaddr >> MAX_PHYSMEM_BITS) 109 for (;;); 110 111 /* 112 * Compute the delta between the address I am compiled to run at 113 * and the address I am actually running at. 114 */ 115 phys_base = load_delta = __START_KERNEL_map + p2v_offset; 116 117 /* Is the address not 2M aligned? */ 118 if (load_delta & ~PMD_MASK) 119 for (;;); 120 121 va_text = physaddr - p2v_offset; 122 va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; 123 124 /* Include the SME encryption mask in the fixup value */ 125 load_delta += sme_get_me_mask(); 126 127 /* Fixup the physical addresses in the page table */ 128 129 pgd = rip_rel_ptr(early_top_pgt); 130 pgd[pgd_index(__START_KERNEL_map)] += load_delta; 131 132 if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { 133 p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); 134 p4d[MAX_PTRS_PER_P4D - 1] += load_delta; 135 136 pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; 137 } 138 139 level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta; 140 level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta; 141 142 for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) 143 level2_fixmap_pgt[i].pmd += load_delta; 144 145 /* 146 * Set up the identity mapping for the switchover. These 147 * entries should *NOT* have the global bit set! This also 148 * creates a bunch of nonsense entries but that is fine -- 149 * it avoids problems around wraparound. 150 */ 151 152 pud = &early_pgts[0]->pmd; 153 pmd = &early_pgts[1]->pmd; 154 next_early_pgt = 2; 155 156 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); 157 158 if (la57) { 159 p4d = &early_pgts[next_early_pgt++]->pmd; 160 161 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 162 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; 163 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; 164 165 i = physaddr >> P4D_SHIFT; 166 p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 167 p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; 168 } else { 169 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; 170 pgd[i + 0] = (pgdval_t)pud + pgtable_flags; 171 pgd[i + 1] = (pgdval_t)pud + pgtable_flags; 172 } 173 174 i = physaddr >> PUD_SHIFT; 175 pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 176 pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; 177 178 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; 179 pmd_entry += sme_get_me_mask(); 180 pmd_entry += physaddr; 181 182 for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { 183 int idx = i + (physaddr >> PMD_SHIFT); 184 185 pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; 186 } 187 188 /* 189 * Fixup the kernel text+data virtual addresses. Note that 190 * we might write invalid pmds, when the kernel is relocated 191 * cleanup_highmap() fixes this up along with the mappings 192 * beyond _end. 193 * 194 * Only the region occupied by the kernel image has so far 195 * been checked against the table of usable memory regions 196 * provided by the firmware, so invalidate pages outside that 197 * region. A page table entry that maps to a reserved area of 198 * memory would allow processor speculation into that area, 199 * and on some hardware (particularly the UV platform) even 200 * speculative access to some reserved areas is caught as an 201 * error, causing the BIOS to halt the system. 202 */ 203 204 pmd = rip_rel_ptr(level2_kernel_pgt); 205 206 /* invalidate pages before the kernel image */ 207 for (i = 0; i < pmd_index(va_text); i++) 208 pmd[i] &= ~_PAGE_PRESENT; 209 210 /* fixup pages that are part of the kernel image */ 211 for (; i <= pmd_index(va_end); i++) 212 if (pmd[i] & _PAGE_PRESENT) 213 pmd[i] += load_delta; 214 215 /* invalidate pages after the kernel image */ 216 for (; i < PTRS_PER_PMD; i++) 217 pmd[i] &= ~_PAGE_PRESENT; 218 219 return sme_postprocess_startup(bp, pmd, p2v_offset); 220 } 221