xref: /linux/arch/x86/boot/startup/map_kernel.c (revision dafb26f4271b9cc9cad07d9abf3c71c492e14f4c)
1dbe0ad77SArd Biesheuvel // SPDX-License-Identifier: GPL-2.0
2dbe0ad77SArd Biesheuvel 
3dbe0ad77SArd Biesheuvel #include <linux/init.h>
4dbe0ad77SArd Biesheuvel #include <linux/linkage.h>
5dbe0ad77SArd Biesheuvel #include <linux/types.h>
6dbe0ad77SArd Biesheuvel #include <linux/kernel.h>
7dbe0ad77SArd Biesheuvel #include <linux/pgtable.h>
8dbe0ad77SArd Biesheuvel 
9dbe0ad77SArd Biesheuvel #include <asm/init.h>
10dbe0ad77SArd Biesheuvel #include <asm/sections.h>
11dbe0ad77SArd Biesheuvel #include <asm/setup.h>
12dbe0ad77SArd Biesheuvel #include <asm/sev.h>
13dbe0ad77SArd Biesheuvel 
14dbe0ad77SArd Biesheuvel extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
15dbe0ad77SArd Biesheuvel extern unsigned int next_early_pgt;
16dbe0ad77SArd Biesheuvel 
17dbe0ad77SArd Biesheuvel static inline bool check_la57_support(void)
18dbe0ad77SArd Biesheuvel {
19dbe0ad77SArd Biesheuvel 	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
20dbe0ad77SArd Biesheuvel 		return false;
21dbe0ad77SArd Biesheuvel 
22dbe0ad77SArd Biesheuvel 	/*
23dbe0ad77SArd Biesheuvel 	 * 5-level paging is detected and enabled at kernel decompression
24dbe0ad77SArd Biesheuvel 	 * stage. Only check if it has been enabled there.
25dbe0ad77SArd Biesheuvel 	 */
26dbe0ad77SArd Biesheuvel 	if (!(native_read_cr4() & X86_CR4_LA57))
27dbe0ad77SArd Biesheuvel 		return false;
28dbe0ad77SArd Biesheuvel 
29*dafb26f4SArd Biesheuvel 	__pgtable_l5_enabled	= 1;
30*dafb26f4SArd Biesheuvel 	pgdir_shift		= 48;
31*dafb26f4SArd Biesheuvel 	ptrs_per_p4d		= 512;
32*dafb26f4SArd Biesheuvel 	page_offset_base	= __PAGE_OFFSET_BASE_L5;
33*dafb26f4SArd Biesheuvel 	vmalloc_base		= __VMALLOC_BASE_L5;
34*dafb26f4SArd Biesheuvel 	vmemmap_base		= __VMEMMAP_BASE_L5;
35dbe0ad77SArd Biesheuvel 
36dbe0ad77SArd Biesheuvel 	return true;
37dbe0ad77SArd Biesheuvel }
38dbe0ad77SArd Biesheuvel 
39dbe0ad77SArd Biesheuvel static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
40dbe0ad77SArd Biesheuvel 						    pmdval_t *pmd,
41dbe0ad77SArd Biesheuvel 						    unsigned long p2v_offset)
42dbe0ad77SArd Biesheuvel {
43dbe0ad77SArd Biesheuvel 	unsigned long paddr, paddr_end;
44dbe0ad77SArd Biesheuvel 	int i;
45dbe0ad77SArd Biesheuvel 
46dbe0ad77SArd Biesheuvel 	/* Encrypt the kernel and related (if SME is active) */
47dbe0ad77SArd Biesheuvel 	sme_encrypt_kernel(bp);
48dbe0ad77SArd Biesheuvel 
49dbe0ad77SArd Biesheuvel 	/*
50dbe0ad77SArd Biesheuvel 	 * Clear the memory encryption mask from the .bss..decrypted section.
51dbe0ad77SArd Biesheuvel 	 * The bss section will be memset to zero later in the initialization so
52dbe0ad77SArd Biesheuvel 	 * there is no need to zero it after changing the memory encryption
53dbe0ad77SArd Biesheuvel 	 * attribute.
54dbe0ad77SArd Biesheuvel 	 */
55dbe0ad77SArd Biesheuvel 	if (sme_get_me_mask()) {
56dbe0ad77SArd Biesheuvel 		paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
57dbe0ad77SArd Biesheuvel 		paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
58dbe0ad77SArd Biesheuvel 
59dbe0ad77SArd Biesheuvel 		for (; paddr < paddr_end; paddr += PMD_SIZE) {
60dbe0ad77SArd Biesheuvel 			/*
61dbe0ad77SArd Biesheuvel 			 * On SNP, transition the page to shared in the RMP table so that
62dbe0ad77SArd Biesheuvel 			 * it is consistent with the page table attribute change.
63dbe0ad77SArd Biesheuvel 			 *
64dbe0ad77SArd Biesheuvel 			 * __start_bss_decrypted has a virtual address in the high range
65dbe0ad77SArd Biesheuvel 			 * mapping (kernel .text). PVALIDATE, by way of
66dbe0ad77SArd Biesheuvel 			 * early_snp_set_memory_shared(), requires a valid virtual
67dbe0ad77SArd Biesheuvel 			 * address but the kernel is currently running off of the identity
68dbe0ad77SArd Biesheuvel 			 * mapping so use the PA to get a *currently* valid virtual address.
69dbe0ad77SArd Biesheuvel 			 */
70dbe0ad77SArd Biesheuvel 			early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
71dbe0ad77SArd Biesheuvel 
72dbe0ad77SArd Biesheuvel 			i = pmd_index(paddr - p2v_offset);
73dbe0ad77SArd Biesheuvel 			pmd[i] -= sme_get_me_mask();
74dbe0ad77SArd Biesheuvel 		}
75dbe0ad77SArd Biesheuvel 	}
76dbe0ad77SArd Biesheuvel 
77dbe0ad77SArd Biesheuvel 	/*
78dbe0ad77SArd Biesheuvel 	 * Return the SME encryption mask (if SME is active) to be used as a
79dbe0ad77SArd Biesheuvel 	 * modifier for the initial pgdir entry programmed into CR3.
80dbe0ad77SArd Biesheuvel 	 */
81dbe0ad77SArd Biesheuvel 	return sme_get_me_mask();
82dbe0ad77SArd Biesheuvel }
83dbe0ad77SArd Biesheuvel 
84*dafb26f4SArd Biesheuvel /*
85*dafb26f4SArd Biesheuvel  * This code is compiled using PIC codegen because it will execute from the
86*dafb26f4SArd Biesheuvel  * early 1:1 mapping of memory, which deviates from the mapping expected by the
87*dafb26f4SArd Biesheuvel  * linker. Due to this deviation, taking the address of a global variable will
88*dafb26f4SArd Biesheuvel  * produce an ambiguous result when using the plain & operator.  Instead,
89*dafb26f4SArd Biesheuvel  * rip_rel_ptr() must be used, which will return the RIP-relative address in
90*dafb26f4SArd Biesheuvel  * the 1:1 mapping of memory. Kernel virtual addresses can be determined by
91*dafb26f4SArd Biesheuvel  * subtracting p2v_offset from the RIP-relative address.
92dbe0ad77SArd Biesheuvel  */
93dbe0ad77SArd Biesheuvel unsigned long __head __startup_64(unsigned long p2v_offset,
94dbe0ad77SArd Biesheuvel 				  struct boot_params *bp)
95dbe0ad77SArd Biesheuvel {
96dbe0ad77SArd Biesheuvel 	pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
97dbe0ad77SArd Biesheuvel 	unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
98dbe0ad77SArd Biesheuvel 	unsigned long va_text, va_end;
99dbe0ad77SArd Biesheuvel 	unsigned long pgtable_flags;
100dbe0ad77SArd Biesheuvel 	unsigned long load_delta;
101dbe0ad77SArd Biesheuvel 	pgdval_t *pgd;
102dbe0ad77SArd Biesheuvel 	p4dval_t *p4d;
103dbe0ad77SArd Biesheuvel 	pudval_t *pud;
104dbe0ad77SArd Biesheuvel 	pmdval_t *pmd, pmd_entry;
105dbe0ad77SArd Biesheuvel 	bool la57;
106dbe0ad77SArd Biesheuvel 	int i;
107dbe0ad77SArd Biesheuvel 
108dbe0ad77SArd Biesheuvel 	la57 = check_la57_support();
109dbe0ad77SArd Biesheuvel 
110dbe0ad77SArd Biesheuvel 	/* Is the address too large? */
111dbe0ad77SArd Biesheuvel 	if (physaddr >> MAX_PHYSMEM_BITS)
112dbe0ad77SArd Biesheuvel 		for (;;);
113dbe0ad77SArd Biesheuvel 
114dbe0ad77SArd Biesheuvel 	/*
115dbe0ad77SArd Biesheuvel 	 * Compute the delta between the address I am compiled to run at
116dbe0ad77SArd Biesheuvel 	 * and the address I am actually running at.
117dbe0ad77SArd Biesheuvel 	 */
118*dafb26f4SArd Biesheuvel 	phys_base = load_delta = __START_KERNEL_map + p2v_offset;
119dbe0ad77SArd Biesheuvel 
120dbe0ad77SArd Biesheuvel 	/* Is the address not 2M aligned? */
121dbe0ad77SArd Biesheuvel 	if (load_delta & ~PMD_MASK)
122dbe0ad77SArd Biesheuvel 		for (;;);
123dbe0ad77SArd Biesheuvel 
124dbe0ad77SArd Biesheuvel 	va_text = physaddr - p2v_offset;
125dbe0ad77SArd Biesheuvel 	va_end  = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
126dbe0ad77SArd Biesheuvel 
127dbe0ad77SArd Biesheuvel 	/* Include the SME encryption mask in the fixup value */
128dbe0ad77SArd Biesheuvel 	load_delta += sme_get_me_mask();
129dbe0ad77SArd Biesheuvel 
130dbe0ad77SArd Biesheuvel 	/* Fixup the physical addresses in the page table */
131dbe0ad77SArd Biesheuvel 
132dbe0ad77SArd Biesheuvel 	pgd = rip_rel_ptr(early_top_pgt);
133dbe0ad77SArd Biesheuvel 	pgd[pgd_index(__START_KERNEL_map)] += load_delta;
134dbe0ad77SArd Biesheuvel 
135dbe0ad77SArd Biesheuvel 	if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
136dbe0ad77SArd Biesheuvel 		p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
137dbe0ad77SArd Biesheuvel 		p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
138dbe0ad77SArd Biesheuvel 
139dbe0ad77SArd Biesheuvel 		pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
140dbe0ad77SArd Biesheuvel 	}
141dbe0ad77SArd Biesheuvel 
142*dafb26f4SArd Biesheuvel 	level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
143*dafb26f4SArd Biesheuvel 	level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
144dbe0ad77SArd Biesheuvel 
145dbe0ad77SArd Biesheuvel 	for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
146*dafb26f4SArd Biesheuvel 		level2_fixmap_pgt[i].pmd += load_delta;
147dbe0ad77SArd Biesheuvel 
148dbe0ad77SArd Biesheuvel 	/*
149dbe0ad77SArd Biesheuvel 	 * Set up the identity mapping for the switchover.  These
150dbe0ad77SArd Biesheuvel 	 * entries should *NOT* have the global bit set!  This also
151dbe0ad77SArd Biesheuvel 	 * creates a bunch of nonsense entries but that is fine --
152dbe0ad77SArd Biesheuvel 	 * it avoids problems around wraparound.
153dbe0ad77SArd Biesheuvel 	 */
154dbe0ad77SArd Biesheuvel 
155dbe0ad77SArd Biesheuvel 	pud = &early_pgts[0]->pmd;
156dbe0ad77SArd Biesheuvel 	pmd = &early_pgts[1]->pmd;
157*dafb26f4SArd Biesheuvel 	next_early_pgt = 2;
158dbe0ad77SArd Biesheuvel 
159dbe0ad77SArd Biesheuvel 	pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
160dbe0ad77SArd Biesheuvel 
161dbe0ad77SArd Biesheuvel 	if (la57) {
162*dafb26f4SArd Biesheuvel 		p4d = &early_pgts[next_early_pgt++]->pmd;
163dbe0ad77SArd Biesheuvel 
164dbe0ad77SArd Biesheuvel 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
165dbe0ad77SArd Biesheuvel 		pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
166dbe0ad77SArd Biesheuvel 		pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
167dbe0ad77SArd Biesheuvel 
168dbe0ad77SArd Biesheuvel 		i = physaddr >> P4D_SHIFT;
169dbe0ad77SArd Biesheuvel 		p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
170dbe0ad77SArd Biesheuvel 		p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
171dbe0ad77SArd Biesheuvel 	} else {
172dbe0ad77SArd Biesheuvel 		i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
173dbe0ad77SArd Biesheuvel 		pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
174dbe0ad77SArd Biesheuvel 		pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
175dbe0ad77SArd Biesheuvel 	}
176dbe0ad77SArd Biesheuvel 
177dbe0ad77SArd Biesheuvel 	i = physaddr >> PUD_SHIFT;
178dbe0ad77SArd Biesheuvel 	pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
179dbe0ad77SArd Biesheuvel 	pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
180dbe0ad77SArd Biesheuvel 
181dbe0ad77SArd Biesheuvel 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
182dbe0ad77SArd Biesheuvel 	/* Filter out unsupported __PAGE_KERNEL_* bits: */
183*dafb26f4SArd Biesheuvel 	pmd_entry &= __supported_pte_mask;
184dbe0ad77SArd Biesheuvel 	pmd_entry += sme_get_me_mask();
185dbe0ad77SArd Biesheuvel 	pmd_entry +=  physaddr;
186dbe0ad77SArd Biesheuvel 
187dbe0ad77SArd Biesheuvel 	for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
188dbe0ad77SArd Biesheuvel 		int idx = i + (physaddr >> PMD_SHIFT);
189dbe0ad77SArd Biesheuvel 
190dbe0ad77SArd Biesheuvel 		pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
191dbe0ad77SArd Biesheuvel 	}
192dbe0ad77SArd Biesheuvel 
193dbe0ad77SArd Biesheuvel 	/*
194dbe0ad77SArd Biesheuvel 	 * Fixup the kernel text+data virtual addresses. Note that
195dbe0ad77SArd Biesheuvel 	 * we might write invalid pmds, when the kernel is relocated
196dbe0ad77SArd Biesheuvel 	 * cleanup_highmap() fixes this up along with the mappings
197dbe0ad77SArd Biesheuvel 	 * beyond _end.
198dbe0ad77SArd Biesheuvel 	 *
199dbe0ad77SArd Biesheuvel 	 * Only the region occupied by the kernel image has so far
200dbe0ad77SArd Biesheuvel 	 * been checked against the table of usable memory regions
201dbe0ad77SArd Biesheuvel 	 * provided by the firmware, so invalidate pages outside that
202dbe0ad77SArd Biesheuvel 	 * region. A page table entry that maps to a reserved area of
203dbe0ad77SArd Biesheuvel 	 * memory would allow processor speculation into that area,
204dbe0ad77SArd Biesheuvel 	 * and on some hardware (particularly the UV platform) even
205dbe0ad77SArd Biesheuvel 	 * speculative access to some reserved areas is caught as an
206dbe0ad77SArd Biesheuvel 	 * error, causing the BIOS to halt the system.
207dbe0ad77SArd Biesheuvel 	 */
208dbe0ad77SArd Biesheuvel 
209dbe0ad77SArd Biesheuvel 	pmd = rip_rel_ptr(level2_kernel_pgt);
210dbe0ad77SArd Biesheuvel 
211dbe0ad77SArd Biesheuvel 	/* invalidate pages before the kernel image */
212dbe0ad77SArd Biesheuvel 	for (i = 0; i < pmd_index(va_text); i++)
213dbe0ad77SArd Biesheuvel 		pmd[i] &= ~_PAGE_PRESENT;
214dbe0ad77SArd Biesheuvel 
215dbe0ad77SArd Biesheuvel 	/* fixup pages that are part of the kernel image */
216dbe0ad77SArd Biesheuvel 	for (; i <= pmd_index(va_end); i++)
217dbe0ad77SArd Biesheuvel 		if (pmd[i] & _PAGE_PRESENT)
218dbe0ad77SArd Biesheuvel 			pmd[i] += load_delta;
219dbe0ad77SArd Biesheuvel 
220dbe0ad77SArd Biesheuvel 	/* invalidate pages after the kernel image */
221dbe0ad77SArd Biesheuvel 	for (; i < PTRS_PER_PMD; i++)
222dbe0ad77SArd Biesheuvel 		pmd[i] &= ~_PAGE_PRESENT;
223dbe0ad77SArd Biesheuvel 
224dbe0ad77SArd Biesheuvel 	return sme_postprocess_startup(bp, pmd, p2v_offset);
225dbe0ad77SArd Biesheuvel }
226