xref: /linux/arch/x86/boot/startup/map_kernel.c (revision 7212b58d6d7133e4cd3c2295e1fb54febe284156)
1dbe0ad77SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2dbe0ad77SArd Biesheuvel 
3dbe0ad77SArd Biesheuvel #include <linux/init.h>
4dbe0ad77SArd Biesheuvel #include <linux/linkage.h>
5dbe0ad77SArd Biesheuvel #include <linux/types.h>
6dbe0ad77SArd Biesheuvel #include <linux/kernel.h>
7dbe0ad77SArd Biesheuvel #include <linux/pgtable.h>
8dbe0ad77SArd Biesheuvel 
9dbe0ad77SArd Biesheuvel #include <asm/init.h>
10dbe0ad77SArd Biesheuvel #include <asm/sections.h>
11dbe0ad77SArd Biesheuvel #include <asm/setup.h>
12dbe0ad77SArd Biesheuvel #include <asm/sev.h>
13dbe0ad77SArd Biesheuvel 
14dbe0ad77SArd Biesheuvel extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
15dbe0ad77SArd Biesheuvel extern unsigned int next_early_pgt;
16dbe0ad77SArd Biesheuvel 
17dbe0ad77SArd Biesheuvel static inline bool check_la57_support(void)
18dbe0ad77SArd Biesheuvel {
19dbe0ad77SArd Biesheuvel 	/*
20dbe0ad77SArd Biesheuvel 	 * 5-level paging is detected and enabled at kernel decompression
21dbe0ad77SArd Biesheuvel 	 * stage. Only check if it has been enabled there.
22dbe0ad77SArd Biesheuvel 	 */
23dbe0ad77SArd Biesheuvel 	if (!(native_read_cr4() & X86_CR4_LA57))
24dbe0ad77SArd Biesheuvel 		return false;
25dbe0ad77SArd Biesheuvel 
26dafb26f4SArd Biesheuvel 	__pgtable_l5_enabled	= 1;
27dafb26f4SArd Biesheuvel 	pgdir_shift		= 48;
28dafb26f4SArd Biesheuvel 	ptrs_per_p4d		= 512;
29dbe0ad77SArd Biesheuvel 
30dbe0ad77SArd Biesheuvel 	return true;
31dbe0ad77SArd Biesheuvel }
32dbe0ad77SArd Biesheuvel 
33dbe0ad77SArd Biesheuvel static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
34dbe0ad77SArd Biesheuvel 						    pmdval_t *pmd,
35dbe0ad77SArd Biesheuvel 						    unsigned long p2v_offset)
36dbe0ad77SArd Biesheuvel {
37dbe0ad77SArd Biesheuvel 	unsigned long paddr, paddr_end;
38dbe0ad77SArd Biesheuvel 	int i;
39dbe0ad77SArd Biesheuvel 
40dbe0ad77SArd Biesheuvel 	/* Encrypt the kernel and related (if SME is active) */
41dbe0ad77SArd Biesheuvel 	sme_encrypt_kernel(bp);
42dbe0ad77SArd Biesheuvel 
43dbe0ad77SArd Biesheuvel 	/*
44dbe0ad77SArd Biesheuvel 	 * Clear the memory encryption mask from the .bss..decrypted section.
45dbe0ad77SArd Biesheuvel 	 * The bss section will be memset to zero later in the initialization so
46dbe0ad77SArd Biesheuvel 	 * there is no need to zero it after changing the memory encryption
47dbe0ad77SArd Biesheuvel 	 * attribute.
48dbe0ad77SArd Biesheuvel 	 */
49dbe0ad77SArd Biesheuvel 	if (sme_get_me_mask()) {
50dbe0ad77SArd Biesheuvel 		paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
51dbe0ad77SArd Biesheuvel 		paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
52dbe0ad77SArd Biesheuvel 
53dbe0ad77SArd Biesheuvel 		for (; paddr < paddr_end; paddr += PMD_SIZE) {
54dbe0ad77SArd Biesheuvel 			/*
55dbe0ad77SArd Biesheuvel 			 * On SNP, transition the page to shared in the RMP table so that
56dbe0ad77SArd Biesheuvel 			 * it is consistent with the page table attribute change.
57dbe0ad77SArd Biesheuvel 			 *
58dbe0ad77SArd Biesheuvel 			 * __start_bss_decrypted has a virtual address in the high range
59dbe0ad77SArd Biesheuvel 			 * mapping (kernel .text). PVALIDATE, by way of
60dbe0ad77SArd Biesheuvel 			 * early_snp_set_memory_shared(), requires a valid virtual
61dbe0ad77SArd Biesheuvel 			 * address but the kernel is currently running off of the identity
62dbe0ad77SArd Biesheuvel 			 * mapping so use the PA to get a *currently* valid virtual address.
63dbe0ad77SArd Biesheuvel 			 */
64dbe0ad77SArd Biesheuvel 			early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
65dbe0ad77SArd Biesheuvel 
66dbe0ad77SArd Biesheuvel 			i = pmd_index(paddr - p2v_offset);
67dbe0ad77SArd Biesheuvel 			pmd[i] -= sme_get_me_mask();
68dbe0ad77SArd Biesheuvel 		}
69dbe0ad77SArd Biesheuvel 	}
70dbe0ad77SArd Biesheuvel 
71dbe0ad77SArd Biesheuvel 	/*
72dbe0ad77SArd Biesheuvel 	 * Return the SME encryption mask (if SME is active) to be used as a
73dbe0ad77SArd Biesheuvel 	 * modifier for the initial pgdir entry programmed into CR3.
74dbe0ad77SArd Biesheuvel 	 */
75dbe0ad77SArd Biesheuvel 	return sme_get_me_mask();
76dbe0ad77SArd Biesheuvel }
77dbe0ad77SArd Biesheuvel 
78dafb26f4SArd Biesheuvel /*
79dafb26f4SArd Biesheuvel  * This code is compiled using PIC codegen because it will execute from the
80dafb26f4SArd Biesheuvel  * early 1:1 mapping of memory, which deviates from the mapping expected by the
81dafb26f4SArd Biesheuvel  * linker. Due to this deviation, taking the address of a global variable will
82dafb26f4SArd Biesheuvel  * produce an ambiguous result when using the plain & operator.  Instead,
83dafb26f4SArd Biesheuvel  * rip_rel_ptr() must be used, which will return the RIP-relative address in
84dafb26f4SArd Biesheuvel  * the 1:1 mapping of memory. Kernel virtual addresses can be determined by
85dafb26f4SArd Biesheuvel  * subtracting p2v_offset from the RIP-relative address.
86dbe0ad77SArd Biesheuvel  */
87dbe0ad77SArd Biesheuvel unsigned long __head __startup_64(unsigned long p2v_offset,
88dbe0ad77SArd Biesheuvel 				  struct boot_params *bp)
89dbe0ad77SArd Biesheuvel {
90dbe0ad77SArd Biesheuvel 	pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
91dbe0ad77SArd Biesheuvel 	unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
92dbe0ad77SArd Biesheuvel 	unsigned long va_text, va_end;
93dbe0ad77SArd Biesheuvel 	unsigned long pgtable_flags;
94dbe0ad77SArd Biesheuvel 	unsigned long load_delta;
95dbe0ad77SArd Biesheuvel 	pgdval_t *pgd;
96dbe0ad77SArd Biesheuvel 	p4dval_t *p4d;
97dbe0ad77SArd Biesheuvel 	pudval_t *pud;
98dbe0ad77SArd Biesheuvel 	pmdval_t *pmd, pmd_entry;
99dbe0ad77SArd Biesheuvel 	bool la57;
100dbe0ad77SArd Biesheuvel 	int i;
101dbe0ad77SArd Biesheuvel 
102dbe0ad77SArd Biesheuvel 	la57 = check_la57_support();
103dbe0ad77SArd Biesheuvel 
104dbe0ad77SArd Biesheuvel 	/* Is the address too large? */
105dbe0ad77SArd Biesheuvel 	if (physaddr >> MAX_PHYSMEM_BITS)
106dbe0ad77SArd Biesheuvel 		for (;;);
107dbe0ad77SArd Biesheuvel 
108dbe0ad77SArd Biesheuvel 	/*
109dbe0ad77SArd Biesheuvel 	 * Compute the delta between the address I am compiled to run at
110dbe0ad77SArd Biesheuvel 	 * and the address I am actually running at.
111dbe0ad77SArd Biesheuvel 	 */
112dafb26f4SArd Biesheuvel 	phys_base = load_delta = __START_KERNEL_map + p2v_offset;
113dbe0ad77SArd Biesheuvel 
114dbe0ad77SArd Biesheuvel 	/* Is the address not 2M aligned? */
115dbe0ad77SArd Biesheuvel 	if (load_delta & ~PMD_MASK)
116dbe0ad77SArd Biesheuvel 		for (;;);
117dbe0ad77SArd Biesheuvel 
118dbe0ad77SArd Biesheuvel 	va_text = physaddr - p2v_offset;
119dbe0ad77SArd Biesheuvel 	va_end  = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
120dbe0ad77SArd Biesheuvel 
121dbe0ad77SArd Biesheuvel 	/* Include the SME encryption mask in the fixup value */
122dbe0ad77SArd Biesheuvel 	load_delta += sme_get_me_mask();
123dbe0ad77SArd Biesheuvel 
124dbe0ad77SArd Biesheuvel 	/* Fixup the physical addresses in the page table */
125dbe0ad77SArd Biesheuvel 
126dbe0ad77SArd Biesheuvel 	pgd = rip_rel_ptr(early_top_pgt);
127dbe0ad77SArd Biesheuvel 	pgd[pgd_index(__START_KERNEL_map)] += load_delta;
128dbe0ad77SArd Biesheuvel 
129*7212b58dSKirill A. Shutemov 	if (la57) {
130dbe0ad77SArd Biesheuvel 		p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
131dbe0ad77SArd Biesheuvel 		p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
132dbe0ad77SArd Biesheuvel 
133dbe0ad77SArd Biesheuvel 		pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
134dbe0ad77SArd Biesheuvel 	}
135dbe0ad77SArd Biesheuvel 
136dafb26f4SArd Biesheuvel 	level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
137dafb26f4SArd Biesheuvel 	level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
138dbe0ad77SArd Biesheuvel 
139dbe0ad77SArd Biesheuvel 	for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
140dafb26f4SArd Biesheuvel 		level2_fixmap_pgt[i].pmd += load_delta;
141dbe0ad77SArd Biesheuvel 
142dbe0ad77SArd Biesheuvel 	/*
143dbe0ad77SArd Biesheuvel 	 * Set up the identity mapping for the switchover.  These
144dbe0ad77SArd Biesheuvel 	 * entries should *NOT* have the global bit set!  This also
145dbe0ad77SArd Biesheuvel 	 * creates a bunch of nonsense entries but that is fine --
146dbe0ad77SArd Biesheuvel 	 * it avoids problems around wraparound.
147dbe0ad77SArd Biesheuvel 	 */
148dbe0ad77SArd Biesheuvel 
149dbe0ad77SArd Biesheuvel 	pud = &early_pgts[0]->pmd;
150dbe0ad77SArd Biesheuvel 	pmd = &early_pgts[1]->pmd;
151dafb26f4SArd Biesheuvel 	next_early_pgt = 2;
152dbe0ad77SArd Biesheuvel 
153dbe0ad77SArd Biesheuvel 	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
154dbe0ad77SArd Biesheuvel 
155dbe0ad77SArd Biesheuvel 	if (la57) {
156dafb26f4SArd Biesheuvel 		p4d = &early_pgts[next_early_pgt++]->pmd;
157dbe0ad77SArd Biesheuvel 
158dbe0ad77SArd Biesheuvel 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
159dbe0ad77SArd Biesheuvel 		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
160dbe0ad77SArd Biesheuvel 		pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
161dbe0ad77SArd Biesheuvel 
162dbe0ad77SArd Biesheuvel 		i = physaddr >> P4D_SHIFT;
163dbe0ad77SArd Biesheuvel 		p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
164dbe0ad77SArd Biesheuvel 		p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
165dbe0ad77SArd Biesheuvel 	} else {
166dbe0ad77SArd Biesheuvel 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
167dbe0ad77SArd Biesheuvel 		pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
168dbe0ad77SArd Biesheuvel 		pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
169dbe0ad77SArd Biesheuvel 	}
170dbe0ad77SArd Biesheuvel 
171dbe0ad77SArd Biesheuvel 	i = physaddr >> PUD_SHIFT;
172dbe0ad77SArd Biesheuvel 	pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
173dbe0ad77SArd Biesheuvel 	pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
174dbe0ad77SArd Biesheuvel 
175dbe0ad77SArd Biesheuvel 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
176dbe0ad77SArd Biesheuvel 	pmd_entry += sme_get_me_mask();
177dbe0ad77SArd Biesheuvel 	pmd_entry +=  physaddr;
178dbe0ad77SArd Biesheuvel 
179dbe0ad77SArd Biesheuvel 	for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
180dbe0ad77SArd Biesheuvel 		int idx = i + (physaddr >> PMD_SHIFT);
181dbe0ad77SArd Biesheuvel 
182dbe0ad77SArd Biesheuvel 		pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
183dbe0ad77SArd Biesheuvel 	}
184dbe0ad77SArd Biesheuvel 
185dbe0ad77SArd Biesheuvel 	/*
186dbe0ad77SArd Biesheuvel 	 * Fixup the kernel text+data virtual addresses. Note that
187dbe0ad77SArd Biesheuvel 	 * we might write invalid pmds, when the kernel is relocated
188dbe0ad77SArd Biesheuvel 	 * cleanup_highmap() fixes this up along with the mappings
189dbe0ad77SArd Biesheuvel 	 * beyond _end.
190dbe0ad77SArd Biesheuvel 	 *
191dbe0ad77SArd Biesheuvel 	 * Only the region occupied by the kernel image has so far
192dbe0ad77SArd Biesheuvel 	 * been checked against the table of usable memory regions
193dbe0ad77SArd Biesheuvel 	 * provided by the firmware, so invalidate pages outside that
194dbe0ad77SArd Biesheuvel 	 * region. A page table entry that maps to a reserved area of
195dbe0ad77SArd Biesheuvel 	 * memory would allow processor speculation into that area,
196dbe0ad77SArd Biesheuvel 	 * and on some hardware (particularly the UV platform) even
197dbe0ad77SArd Biesheuvel 	 * speculative access to some reserved areas is caught as an
198dbe0ad77SArd Biesheuvel 	 * error, causing the BIOS to halt the system.
199dbe0ad77SArd Biesheuvel 	 */
200dbe0ad77SArd Biesheuvel 
201dbe0ad77SArd Biesheuvel 	pmd = rip_rel_ptr(level2_kernel_pgt);
202dbe0ad77SArd Biesheuvel 
203dbe0ad77SArd Biesheuvel 	/* invalidate pages before the kernel image */
204dbe0ad77SArd Biesheuvel 	for (i = 0; i < pmd_index(va_text); i++)
205dbe0ad77SArd Biesheuvel 		pmd[i] &= ~_PAGE_PRESENT;
206dbe0ad77SArd Biesheuvel 
207dbe0ad77SArd Biesheuvel 	/* fixup pages that are part of the kernel image */
208dbe0ad77SArd Biesheuvel 	for (; i <= pmd_index(va_end); i++)
209dbe0ad77SArd Biesheuvel 		if (pmd[i] & _PAGE_PRESENT)
210dbe0ad77SArd Biesheuvel 			pmd[i] += load_delta;
211dbe0ad77SArd Biesheuvel 
212dbe0ad77SArd Biesheuvel 	/* invalidate pages after the kernel image */
213dbe0ad77SArd Biesheuvel 	for (; i < PTRS_PER_PMD; i++)
214dbe0ad77SArd Biesheuvel 		pmd[i] &= ~_PAGE_PRESENT;
215dbe0ad77SArd Biesheuvel 
216dbe0ad77SArd Biesheuvel 	return sme_postprocess_startup(bp, pmd, p2v_offset);
217dbe0ad77SArd Biesheuvel }
218