1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/sched/task.h> 3 #include <linux/pgtable.h> 4 #include <linux/kasan.h> 5 #include <asm/pgalloc.h> 6 #include <asm/facility.h> 7 #include <asm/sections.h> 8 #include <asm/ctlreg.h> 9 #include <asm/physmem_info.h> 10 #include <asm/maccess.h> 11 #include <asm/abs_lowcore.h> 12 #include "decompressor.h" 13 #include "boot.h" 14 15 struct ctlreg __bootdata_preserved(s390_invalid_asce); 16 17 #ifdef CONFIG_PROC_FS 18 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 19 #endif 20 21 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 22 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 23 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 24 25 enum populate_mode { 26 POPULATE_NONE, 27 POPULATE_DIRECT, 28 POPULATE_ABS_LOWCORE, 29 #ifdef CONFIG_KASAN 30 POPULATE_KASAN_MAP_SHADOW, 31 POPULATE_KASAN_ZERO_SHADOW, 32 POPULATE_KASAN_SHALLOW 33 #endif 34 }; 35 36 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 37 38 #ifdef CONFIG_KASAN 39 40 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 41 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 42 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 43 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 44 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 45 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 46 47 static pte_t pte_z; 48 49 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 50 { 51 start = PAGE_ALIGN_DOWN(__sha(start)); 52 end = PAGE_ALIGN(__sha(end)); 53 pgtable_populate(start, end, mode); 54 } 55 56 static void kasan_populate_shadow(void) 57 { 58 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 59 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 60 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 61 unsigned long memgap_start = 0; 62 unsigned long untracked_end; 63 unsigned long start, end; 64 int i; 65 66 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 67 if (!machine.has_nx) 68 pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); 69 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 70 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 71 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 72 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 73 74 /* 75 * Current memory layout: 76 * +- 0 -------------+ +- shadow start -+ 77 * |1:1 ident mapping| /|1/8 of ident map| 78 * | | / | | 79 * +-end of ident map+ / +----------------+ 80 * | ... gap ... | / | kasan | 81 * | | / | zero page | 82 * +- vmalloc area -+ / | mapping | 83 * | vmalloc_size | / | (untracked) | 84 * +- modules vaddr -+ / +----------------+ 85 * | 2Gb |/ | unmapped | allocated per module 86 * +- shadow start -+ +----------------+ 87 * | 1/8 addr space | | zero pg mapping| (untracked) 88 * +- shadow end ----+---------+- shadow end ---+ 89 * 90 * Current memory layout (KASAN_VMALLOC): 91 * +- 0 -------------+ +- shadow start -+ 92 * |1:1 ident mapping| /|1/8 of ident map| 93 * | | / | | 94 * +-end of ident map+ / +----------------+ 95 * | ... gap ... | / | kasan zero page| (untracked) 96 * | | / | mapping | 97 * +- vmalloc area -+ / +----------------+ 98 * | vmalloc_size | / |shallow populate| 99 * +- modules vaddr -+ / +----------------+ 100 * | 2Gb |/ |shallow populate| 101 * +- shadow start -+ +----------------+ 102 * | 1/8 addr space | | zero pg mapping| (untracked) 103 * +- shadow end ----+---------+- shadow end ---+ 104 */ 105 106 for_each_physmem_usable_range(i, &start, &end) { 107 kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); 108 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) 109 kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); 110 memgap_start = end; 111 } 112 if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { 113 untracked_end = VMALLOC_START; 114 /* shallowly populate kasan shadow for vmalloc and modules */ 115 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 116 } else { 117 untracked_end = MODULES_VADDR; 118 } 119 /* populate kasan shadow for untracked memory */ 120 kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); 121 kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 122 } 123 124 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 125 unsigned long end, enum populate_mode mode) 126 { 127 if (mode == POPULATE_KASAN_ZERO_SHADOW && 128 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 129 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 130 return true; 131 } 132 return false; 133 } 134 135 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 136 unsigned long end, enum populate_mode mode) 137 { 138 if (mode == POPULATE_KASAN_ZERO_SHADOW && 139 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 140 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 141 return true; 142 } 143 return false; 144 } 145 146 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 147 unsigned long end, enum populate_mode mode) 148 { 149 if (mode == POPULATE_KASAN_ZERO_SHADOW && 150 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 151 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 152 return true; 153 } 154 return false; 155 } 156 157 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 158 unsigned long end, enum populate_mode mode) 159 { 160 if (mode == POPULATE_KASAN_ZERO_SHADOW && 161 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 162 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 163 return true; 164 } 165 return false; 166 } 167 168 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 169 { 170 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 171 set_pte(pte, pte_z); 172 return true; 173 } 174 return false; 175 } 176 #else 177 178 static inline void kasan_populate_shadow(void) {} 179 180 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 181 unsigned long end, enum populate_mode mode) 182 { 183 return false; 184 } 185 186 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 187 unsigned long end, enum populate_mode mode) 188 { 189 return false; 190 } 191 192 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 193 unsigned long end, enum populate_mode mode) 194 { 195 return false; 196 } 197 198 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 199 unsigned long end, enum populate_mode mode) 200 { 201 return false; 202 } 203 204 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 205 { 206 return false; 207 } 208 209 #endif 210 211 /* 212 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 213 */ 214 static inline pte_t *__virt_to_kpte(unsigned long va) 215 { 216 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 217 } 218 219 static void *boot_crst_alloc(unsigned long val) 220 { 221 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 222 unsigned long *table; 223 224 table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); 225 crst_table_init(table, val); 226 return table; 227 } 228 229 static pte_t *boot_pte_alloc(void) 230 { 231 static void *pte_leftover; 232 pte_t *pte; 233 234 /* 235 * handling pte_leftovers this way helps to avoid memory fragmentation 236 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off 237 */ 238 if (!pte_leftover) { 239 pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 240 pte = pte_leftover + _PAGE_TABLE_SIZE; 241 } else { 242 pte = pte_leftover; 243 pte_leftover = NULL; 244 } 245 246 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 247 return pte; 248 } 249 250 static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) 251 { 252 switch (mode) { 253 case POPULATE_NONE: 254 return -1; 255 case POPULATE_DIRECT: 256 return addr; 257 case POPULATE_ABS_LOWCORE: 258 return __abs_lowcore_pa(addr); 259 #ifdef CONFIG_KASAN 260 case POPULATE_KASAN_MAP_SHADOW: 261 addr = physmem_alloc_top_down(RR_VMEM, size, size); 262 memset((void *)addr, 0, size); 263 return addr; 264 #endif 265 default: 266 return -1; 267 } 268 } 269 270 static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) 271 { 272 return machine.has_edat2 && 273 IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; 274 } 275 276 static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) 277 { 278 return machine.has_edat1 && 279 IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; 280 } 281 282 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 283 enum populate_mode mode) 284 { 285 unsigned long pages = 0; 286 pte_t *pte, entry; 287 288 pte = pte_offset_kernel(pmd, addr); 289 for (; addr < end; addr += PAGE_SIZE, pte++) { 290 if (pte_none(*pte)) { 291 if (kasan_pte_populate_zero_shadow(pte, mode)) 292 continue; 293 entry = __pte(_pa(addr, PAGE_SIZE, mode)); 294 entry = set_pte_bit(entry, PAGE_KERNEL); 295 if (!machine.has_nx) 296 entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); 297 set_pte(pte, entry); 298 pages++; 299 } 300 } 301 if (mode == POPULATE_DIRECT) 302 update_page_count(PG_DIRECT_MAP_4K, pages); 303 } 304 305 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 306 enum populate_mode mode) 307 { 308 unsigned long next, pages = 0; 309 pmd_t *pmd, entry; 310 pte_t *pte; 311 312 pmd = pmd_offset(pud, addr); 313 for (; addr < end; addr = next, pmd++) { 314 next = pmd_addr_end(addr, end); 315 if (pmd_none(*pmd)) { 316 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 317 continue; 318 if (can_large_pmd(pmd, addr, next)) { 319 entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); 320 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 321 if (!machine.has_nx) 322 entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 323 set_pmd(pmd, entry); 324 pages++; 325 continue; 326 } 327 pte = boot_pte_alloc(); 328 pmd_populate(&init_mm, pmd, pte); 329 } else if (pmd_large(*pmd)) { 330 continue; 331 } 332 pgtable_pte_populate(pmd, addr, next, mode); 333 } 334 if (mode == POPULATE_DIRECT) 335 update_page_count(PG_DIRECT_MAP_1M, pages); 336 } 337 338 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 339 enum populate_mode mode) 340 { 341 unsigned long next, pages = 0; 342 pud_t *pud, entry; 343 pmd_t *pmd; 344 345 pud = pud_offset(p4d, addr); 346 for (; addr < end; addr = next, pud++) { 347 next = pud_addr_end(addr, end); 348 if (pud_none(*pud)) { 349 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 350 continue; 351 if (can_large_pud(pud, addr, next)) { 352 entry = __pud(_pa(addr, _REGION3_SIZE, mode)); 353 entry = set_pud_bit(entry, REGION3_KERNEL); 354 if (!machine.has_nx) 355 entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); 356 set_pud(pud, entry); 357 pages++; 358 continue; 359 } 360 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 361 pud_populate(&init_mm, pud, pmd); 362 } else if (pud_large(*pud)) { 363 continue; 364 } 365 pgtable_pmd_populate(pud, addr, next, mode); 366 } 367 if (mode == POPULATE_DIRECT) 368 update_page_count(PG_DIRECT_MAP_2G, pages); 369 } 370 371 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 372 enum populate_mode mode) 373 { 374 unsigned long next; 375 p4d_t *p4d; 376 pud_t *pud; 377 378 p4d = p4d_offset(pgd, addr); 379 for (; addr < end; addr = next, p4d++) { 380 next = p4d_addr_end(addr, end); 381 if (p4d_none(*p4d)) { 382 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 383 continue; 384 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 385 p4d_populate(&init_mm, p4d, pud); 386 } 387 pgtable_pud_populate(p4d, addr, next, mode); 388 } 389 } 390 391 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 392 { 393 unsigned long next; 394 pgd_t *pgd; 395 p4d_t *p4d; 396 397 pgd = pgd_offset(&init_mm, addr); 398 for (; addr < end; addr = next, pgd++) { 399 next = pgd_addr_end(addr, end); 400 if (pgd_none(*pgd)) { 401 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 402 continue; 403 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 404 pgd_populate(&init_mm, pgd, p4d); 405 } 406 #ifdef CONFIG_KASAN 407 if (mode == POPULATE_KASAN_SHALLOW) 408 continue; 409 #endif 410 pgtable_p4d_populate(pgd, addr, next, mode); 411 } 412 } 413 414 void setup_vmem(unsigned long asce_limit) 415 { 416 unsigned long start, end; 417 unsigned long asce_type; 418 unsigned long asce_bits; 419 int i; 420 421 if (asce_limit == _REGION1_SIZE) { 422 asce_type = _REGION2_ENTRY_EMPTY; 423 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 424 } else { 425 asce_type = _REGION3_ENTRY_EMPTY; 426 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 427 } 428 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 429 430 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 431 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 432 433 /* 434 * To allow prefixing the lowcore must be mapped with 4KB pages. 435 * To prevent creation of a large page at address 0 first map 436 * the lowcore and create the identity mapping only afterwards. 437 */ 438 pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); 439 for_each_physmem_usable_range(i, &start, &end) 440 pgtable_populate(start, end, POPULATE_DIRECT); 441 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 442 POPULATE_ABS_LOWCORE); 443 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 444 POPULATE_NONE); 445 memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); 446 447 kasan_populate_shadow(); 448 449 S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; 450 S390_lowcore.user_asce = s390_invalid_asce; 451 452 local_ctl_load(1, &S390_lowcore.kernel_asce); 453 local_ctl_load(7, &S390_lowcore.user_asce); 454 local_ctl_load(13, &S390_lowcore.kernel_asce); 455 456 init_mm.context.asce = S390_lowcore.kernel_asce.val; 457 } 458