1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright 2023 Google LLC
3 // Author: Ard Biesheuvel <ardb@google.com>
4
5 #include <linux/init.h>
6 #include <linux/libfdt.h>
7 #include <linux/linkage.h>
8 #include <linux/types.h>
9 #include <linux/sizes.h>
10 #include <linux/string.h>
11
12 #include <asm/memory.h>
13 #include <asm/pgalloc.h>
14 #include <asm/pgtable.h>
15 #include <asm/tlbflush.h>
16
17 #include "pi.h"
18
19 extern const u8 __eh_frame_start[], __eh_frame_end[];
20
21 extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
22
map_segment(pgd_t * pg_dir,phys_addr_t * pgd,u64 va_offset,void * start,void * end,pgprot_t prot,bool may_use_cont,int root_level)23 static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
24 void *start, void *end, pgprot_t prot,
25 bool may_use_cont, int root_level)
26 {
27 map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
28 ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
29 prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
30 }
31
unmap_segment(pgd_t * pg_dir,u64 va_offset,void * start,void * end,int root_level)32 static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
33 void *end, int root_level)
34 {
35 map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
36 false, root_level);
37 }
38
map_kernel(u64 kaslr_offset,u64 va_offset,int root_level)39 static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
40 {
41 bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
42 bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
43 phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
44 pgprot_t text_prot = PAGE_KERNEL_ROX;
45 pgprot_t data_prot = PAGE_KERNEL;
46 pgprot_t prot;
47
48 /*
49 * External debuggers may need to write directly to the text mapping to
50 * install SW breakpoints. Allow this (only) when explicitly requested
51 * with rodata=off.
52 */
53 if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF))
54 text_prot = PAGE_KERNEL_EXEC;
55
56 /*
57 * We only enable the shadow call stack dynamically if we are running
58 * on a system that does not implement PAC or BTI. PAC and SCS provide
59 * roughly the same level of protection, and BTI relies on the PACIASP
60 * instructions serving as landing pads, preventing us from patching
61 * those instructions into something else.
62 */
63 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac())
64 enable_scs = false;
65
66 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) {
67 enable_scs = false;
68
69 /*
70 * If we have a CPU that supports BTI and a kernel built for
71 * BTI then mark the kernel executable text as guarded pages
72 * now so we don't have to rewrite the page tables later.
73 */
74 text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
75 }
76
77 /* Map all code read-write on the first pass if needed */
78 twopass |= enable_scs;
79 prot = twopass ? data_prot : text_prot;
80
81 /*
82 * [_stext, _text) isn't executed after boot and contains some
83 * non-executable, unpredictable data, so map it non-executable.
84 */
85 map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
86 false, root_level);
87 map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
88 !twopass, root_level);
89 map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
90 __inittext_begin, data_prot, false, root_level);
91 map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
92 __inittext_end, prot, false, root_level);
93 map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
94 __initdata_end, data_prot, false, root_level);
95 map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
96 true, root_level);
97 dsb(ishst);
98
99 idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
100
101 if (twopass) {
102 if (IS_ENABLED(CONFIG_RELOCATABLE))
103 relocate_kernel(kaslr_offset);
104
105 if (enable_scs) {
106 scs_patch(__eh_frame_start + va_offset,
107 __eh_frame_end - __eh_frame_start);
108 asm("ic ialluis");
109
110 dynamic_scs_is_enabled = true;
111 }
112
113 /*
114 * Unmap the text region before remapping it, to avoid
115 * potential TLB conflicts when creating the contiguous
116 * descriptors.
117 */
118 unmap_segment(init_pg_dir, va_offset, _stext, _etext,
119 root_level);
120 dsb(ishst);
121 isb();
122 __tlbi(vmalle1);
123 isb();
124
125 /*
126 * Remap these segments with different permissions
127 * No new page table allocations should be needed
128 */
129 map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
130 text_prot, true, root_level);
131 map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
132 __inittext_end, text_prot, false, root_level);
133 }
134
135 /* Copy the root page table to its final location */
136 memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
137 dsb(ishst);
138 idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
139 }
140
set_ttbr0_for_lpa2(phys_addr_t ttbr)141 static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
142 {
143 u64 sctlr = read_sysreg(sctlr_el1);
144 u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
145 u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1);
146 u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
147 ID_AA64MMFR0_EL1_PARANGE_SHIFT);
148
149 tcr &= ~TCR_IPS_MASK;
150 tcr |= parange << TCR_IPS_SHIFT;
151
152 asm(" msr sctlr_el1, %0 ;"
153 " isb ;"
154 " msr ttbr0_el1, %1 ;"
155 " msr tcr_el1, %2 ;"
156 " isb ;"
157 " tlbi vmalle1 ;"
158 " dsb nsh ;"
159 " isb ;"
160 " msr sctlr_el1, %3 ;"
161 " isb ;"
162 :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
163 }
164
remap_idmap_for_lpa2(void)165 static void __init remap_idmap_for_lpa2(void)
166 {
167 /* clear the bits that change meaning once LPA2 is turned on */
168 ptdesc_t mask = PTE_SHARED;
169
170 /*
171 * We have to clear bits [9:8] in all block or page descriptors in the
172 * initial ID map, as otherwise they will be (mis)interpreted as
173 * physical address bits once we flick the LPA2 switch (TCR.DS). Since
174 * we cannot manipulate live descriptors in that way without creating
175 * potential TLB conflicts, let's create another temporary ID map in a
176 * LPA2 compatible fashion, and update the initial ID map while running
177 * from that.
178 */
179 create_init_idmap(init_pg_dir, mask);
180 dsb(ishst);
181 set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
182
183 /*
184 * Recreate the initial ID map with the same granularity as before.
185 * Don't bother with the FDT, we no longer need it after this.
186 */
187 memset(init_idmap_pg_dir, 0,
188 (char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
189
190 create_init_idmap(init_idmap_pg_dir, mask);
191 dsb(ishst);
192
193 /* switch back to the updated initial ID map */
194 set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
195
196 /* wipe the temporary ID map from memory */
197 memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
198 }
199
map_fdt(phys_addr_t fdt)200 static void *__init map_fdt(phys_addr_t fdt)
201 {
202 static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
203 phys_addr_t efdt = fdt + MAX_FDT_SIZE;
204 phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
205
206 /*
207 * Map up to MAX_FDT_SIZE bytes, but avoid overlap with
208 * the kernel image.
209 */
210 map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
211 fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
212 (pte_t *)init_idmap_pg_dir, false, 0);
213 dsb(ishst);
214
215 return (void *)fdt;
216 }
217
218 /*
219 * PI version of the Cavium Eratum 27456 detection, which makes it
220 * impossible to use non-global mappings.
221 */
ng_mappings_allowed(void)222 static bool __init ng_mappings_allowed(void)
223 {
224 static const struct midr_range cavium_erratum_27456_cpus[] __initconst = {
225 /* Cavium ThunderX, T88 pass 1.x - 2.1 */
226 MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
227 /* Cavium ThunderX, T81 pass 1.0 */
228 MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
229 {},
230 };
231
232 for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) {
233 if (midr_is_cpu_model_range(read_cpuid_id(), r->model,
234 r->rv_min, r->rv_max))
235 return false;
236 }
237
238 return true;
239 }
240
early_map_kernel(u64 boot_status,phys_addr_t fdt)241 asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
242 {
243 static char const chosen_str[] __initconst = "/chosen";
244 u64 va_base, pa_base = (u64)&_text;
245 u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
246 int root_level = 4 - CONFIG_PGTABLE_LEVELS;
247 int va_bits = VA_BITS;
248 int chosen;
249 void *fdt_mapped = map_fdt(fdt);
250
251 /* Clear BSS and the initial page tables */
252 memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
253
254 /* Parse the command line for CPU feature overrides */
255 chosen = fdt_path_offset(fdt_mapped, chosen_str);
256 init_feature_override(boot_status, fdt_mapped, chosen);
257
258 if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
259 va_bits = VA_BITS_MIN;
260 } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
261 va_bits = VA_BITS_MIN;
262 root_level++;
263 }
264
265 if (va_bits > VA_BITS_MIN)
266 sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
267
268 /*
269 * The virtual KASLR displacement modulo 2MiB is decided by the
270 * physical placement of the image, as otherwise, we might not be able
271 * to create the early kernel mapping using 2 MiB block descriptors. So
272 * take the low bits of the KASLR offset from the physical address, and
273 * fill in the high bits from the seed.
274 */
275 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
276 u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
277
278 if (kaslr_seed && kaslr_requires_kpti())
279 arm64_use_ng_mappings = ng_mappings_allowed();
280
281 kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
282 }
283
284 if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
285 remap_idmap_for_lpa2();
286
287 va_base = KIMAGE_VADDR + kaslr_offset;
288 map_kernel(kaslr_offset, va_base - pa_base, root_level);
289 }
290