1 // SPDX-License-Identifier: GPL-2.0-only 2 // Copyright 2023 Google LLC 3 // Author: Ard Biesheuvel <ardb@google.com> 4 5 #include <linux/init.h> 6 #include <linux/libfdt.h> 7 #include <linux/linkage.h> 8 #include <linux/types.h> 9 #include <linux/sizes.h> 10 #include <linux/string.h> 11 12 #include <asm/memory.h> 13 #include <asm/pgalloc.h> 14 #include <asm/pgtable.h> 15 #include <asm/tlbflush.h> 16 17 #include "pi.h" 18 19 extern const u8 __eh_frame_start[], __eh_frame_end[]; 20 21 extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir); 22 23 static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset, 24 void *start, void *end, pgprot_t prot, 25 bool may_use_cont, int root_level) 26 { 27 map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET, 28 ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start, 29 prot, root_level, (pte_t *)pg_dir, may_use_cont, 0); 30 } 31 32 static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start, 33 void *end, int root_level) 34 { 35 map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0), 36 false, root_level); 37 } 38 39 static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) 40 { 41 bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS); 42 bool twopass = IS_ENABLED(CONFIG_RELOCATABLE); 43 phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE; 44 pgprot_t text_prot = PAGE_KERNEL_ROX; 45 pgprot_t data_prot = PAGE_KERNEL; 46 pgprot_t prot; 47 48 /* 49 * External debuggers may need to write directly to the text mapping to 50 * install SW breakpoints. Allow this (only) when explicitly requested 51 * with rodata=off. 52 */ 53 if (arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_RODATA_OFF)) 54 text_prot = PAGE_KERNEL_EXEC; 55 56 /* 57 * We only enable the shadow call stack dynamically if we are running 58 * on a system that does not implement PAC or BTI. PAC and SCS provide 59 * roughly the same level of protection, and BTI relies on the PACIASP 60 * instructions serving as landing pads, preventing us from patching 61 * those instructions into something else. 62 */ 63 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && cpu_has_pac()) 64 enable_scs = false; 65 66 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && cpu_has_bti()) { 67 enable_scs = false; 68 69 /* 70 * If we have a CPU that supports BTI and a kernel built for 71 * BTI then mark the kernel executable text as guarded pages 72 * now so we don't have to rewrite the page tables later. 73 */ 74 text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); 75 } 76 77 /* Map all code read-write on the first pass if needed */ 78 twopass |= enable_scs; 79 prot = twopass ? data_prot : text_prot; 80 81 /* 82 * [_stext, _text) isn't executed after boot and contains some 83 * non-executable, unpredictable data, so map it non-executable. 84 */ 85 map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot, 86 false, root_level); 87 map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot, 88 !twopass, root_level); 89 map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata, 90 __inittext_begin, data_prot, false, root_level); 91 map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin, 92 __inittext_end, prot, false, root_level); 93 map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin, 94 __initdata_end, data_prot, false, root_level); 95 map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot, 96 true, root_level); 97 dsb(ishst); 98 99 idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir); 100 101 if (twopass) { 102 if (IS_ENABLED(CONFIG_RELOCATABLE)) 103 relocate_kernel(kaslr_offset); 104 105 if (enable_scs) { 106 scs_patch(__eh_frame_start + va_offset, 107 __eh_frame_end - __eh_frame_start); 108 asm("ic ialluis"); 109 110 dynamic_scs_is_enabled = true; 111 } 112 113 /* 114 * Unmap the text region before remapping it, to avoid 115 * potential TLB conflicts when creating the contiguous 116 * descriptors. 117 */ 118 unmap_segment(init_pg_dir, va_offset, _stext, _etext, 119 root_level); 120 dsb(ishst); 121 isb(); 122 __tlbi(vmalle1); 123 isb(); 124 125 /* 126 * Remap these segments with different permissions 127 * No new page table allocations should be needed 128 */ 129 map_segment(init_pg_dir, NULL, va_offset, _stext, _etext, 130 text_prot, true, root_level); 131 map_segment(init_pg_dir, NULL, va_offset, __inittext_begin, 132 __inittext_end, text_prot, false, root_level); 133 } 134 135 /* Copy the root page table to its final location */ 136 memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE); 137 dsb(ishst); 138 idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir); 139 } 140 141 static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr) 142 { 143 u64 sctlr = read_sysreg(sctlr_el1); 144 u64 tcr = read_sysreg(tcr_el1) | TCR_DS; 145 u64 mmfr0 = read_sysreg(id_aa64mmfr0_el1); 146 u64 parange = cpuid_feature_extract_unsigned_field(mmfr0, 147 ID_AA64MMFR0_EL1_PARANGE_SHIFT); 148 149 tcr &= ~TCR_IPS_MASK; 150 tcr |= parange << TCR_IPS_SHIFT; 151 152 asm(" msr sctlr_el1, %0 ;" 153 " isb ;" 154 " msr ttbr0_el1, %1 ;" 155 " msr tcr_el1, %2 ;" 156 " isb ;" 157 " tlbi vmalle1 ;" 158 " dsb nsh ;" 159 " isb ;" 160 " msr sctlr_el1, %3 ;" 161 " isb ;" 162 :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr)); 163 } 164 165 static void __init remap_idmap_for_lpa2(void) 166 { 167 /* clear the bits that change meaning once LPA2 is turned on */ 168 ptdesc_t mask = PTE_SHARED; 169 170 /* 171 * We have to clear bits [9:8] in all block or page descriptors in the 172 * initial ID map, as otherwise they will be (mis)interpreted as 173 * physical address bits once we flick the LPA2 switch (TCR.DS). Since 174 * we cannot manipulate live descriptors in that way without creating 175 * potential TLB conflicts, let's create another temporary ID map in a 176 * LPA2 compatible fashion, and update the initial ID map while running 177 * from that. 178 */ 179 create_init_idmap(init_pg_dir, mask); 180 dsb(ishst); 181 set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir); 182 183 /* 184 * Recreate the initial ID map with the same granularity as before. 185 * Don't bother with the FDT, we no longer need it after this. 186 */ 187 memset(init_idmap_pg_dir, 0, 188 (char *)init_idmap_pg_end - (char *)init_idmap_pg_dir); 189 190 create_init_idmap(init_idmap_pg_dir, mask); 191 dsb(ishst); 192 193 /* switch back to the updated initial ID map */ 194 set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir); 195 196 /* wipe the temporary ID map from memory */ 197 memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir); 198 } 199 200 static void *__init map_fdt(phys_addr_t fdt) 201 { 202 static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE); 203 phys_addr_t efdt = fdt + MAX_FDT_SIZE; 204 phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */ 205 206 /* 207 * Map up to MAX_FDT_SIZE bytes, but avoid overlap with 208 * the kernel image. 209 */ 210 map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt, 211 fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL, 212 (pte_t *)init_idmap_pg_dir, false, 0); 213 dsb(ishst); 214 215 return (void *)fdt; 216 } 217 218 /* 219 * PI version of the Cavium Eratum 27456 detection, which makes it 220 * impossible to use non-global mappings. 221 */ 222 static bool __init ng_mappings_allowed(void) 223 { 224 static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { 225 /* Cavium ThunderX, T88 pass 1.x - 2.1 */ 226 MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), 227 /* Cavium ThunderX, T81 pass 1.0 */ 228 MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), 229 {}, 230 }; 231 232 for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { 233 if (midr_is_cpu_model_range(read_cpuid_id(), r->model, 234 r->rv_min, r->rv_max)) 235 return false; 236 } 237 238 return true; 239 } 240 241 asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt) 242 { 243 static char const chosen_str[] __initconst = "/chosen"; 244 u64 va_base, pa_base = (u64)&_text; 245 u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN; 246 int root_level = 4 - CONFIG_PGTABLE_LEVELS; 247 int va_bits = VA_BITS; 248 int chosen; 249 void *fdt_mapped = map_fdt(fdt); 250 251 /* Clear BSS and the initial page tables */ 252 memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start); 253 254 /* Parse the command line for CPU feature overrides */ 255 chosen = fdt_path_offset(fdt_mapped, chosen_str); 256 init_feature_override(boot_status, fdt_mapped, chosen); 257 258 if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) { 259 va_bits = VA_BITS_MIN; 260 } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) { 261 va_bits = VA_BITS_MIN; 262 root_level++; 263 } 264 265 if (va_bits > VA_BITS_MIN) 266 sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits)); 267 268 /* 269 * The virtual KASLR displacement modulo 2MiB is decided by the 270 * physical placement of the image, as otherwise, we might not be able 271 * to create the early kernel mapping using 2 MiB block descriptors. So 272 * take the low bits of the KASLR offset from the physical address, and 273 * fill in the high bits from the seed. 274 */ 275 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { 276 u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen); 277 278 if (kaslr_seed && kaslr_requires_kpti()) 279 arm64_use_ng_mappings = ng_mappings_allowed(); 280 281 kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); 282 } 283 284 if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN) 285 remap_idmap_for_lpa2(); 286 287 va_base = KIMAGE_VADDR + kaslr_offset; 288 map_kernel(kaslr_offset, va_base - pa_base, root_level); 289 } 290