1 // SPDX-License-Identifier: GPL-2.0 2 #define boot_fmt(fmt) "vmem: " fmt 3 #include <linux/cpufeature.h> 4 #include <linux/sched/task.h> 5 #include <linux/pgtable.h> 6 #include <linux/kasan.h> 7 #include <asm/page-states.h> 8 #include <asm/pgalloc.h> 9 #include <asm/facility.h> 10 #include <asm/sections.h> 11 #include <asm/ctlreg.h> 12 #include <asm/physmem_info.h> 13 #include <asm/maccess.h> 14 #include <asm/machine.h> 15 #include <asm/abs_lowcore.h> 16 #include "decompressor.h" 17 #include "boot.h" 18 19 struct ctlreg __bootdata_preserved(s390_invalid_asce); 20 21 #ifdef CONFIG_PROC_FS 22 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 23 #endif 24 25 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 26 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 27 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 28 29 enum populate_mode { 30 POPULATE_NONE, 31 POPULATE_DIRECT, 32 POPULATE_LOWCORE, 33 POPULATE_ABS_LOWCORE, 34 POPULATE_IDENTITY, 35 POPULATE_KERNEL, 36 #ifdef CONFIG_KASAN 37 /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ 38 POPULATE_KASAN_MAP_SHADOW, 39 POPULATE_KASAN_ZERO_SHADOW, 40 POPULATE_KASAN_SHALLOW 41 #endif 42 }; 43 44 #define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t 45 static inline const char *get_populate_mode_name(enum populate_mode t) 46 { 47 switch (t) { 48 POPULATE_MODE_NAME(NONE); 49 POPULATE_MODE_NAME(DIRECT); 50 POPULATE_MODE_NAME(LOWCORE); 51 POPULATE_MODE_NAME(ABS_LOWCORE); 52 POPULATE_MODE_NAME(IDENTITY); 53 POPULATE_MODE_NAME(KERNEL); 54 #ifdef CONFIG_KASAN 55 POPULATE_MODE_NAME(KASAN_MAP_SHADOW); 56 POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); 57 POPULATE_MODE_NAME(KASAN_SHALLOW); 58 #endif 59 default: 60 return "UNKNOWN"; 61 } 62 } 63 64 static bool is_kasan_populate_mode(enum populate_mode mode) 65 { 66 #ifdef CONFIG_KASAN 67 return mode >= POPULATE_KASAN_MAP_SHADOW; 68 #else 69 return false; 70 #endif 71 } 72 73 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 74 75 #ifdef CONFIG_KASAN 76 77 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 78 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 79 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 80 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 81 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 82 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 83 84 static pte_t pte_z; 85 86 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 87 { 88 unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); 89 unsigned long sha_end = PAGE_ALIGN(__sha(end)); 90 91 boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), 92 start, end, sha_start, sha_end); 93 pgtable_populate(sha_start, sha_end, mode); 94 } 95 96 static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 97 { 98 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 99 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 100 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 101 unsigned long memgap_start = 0; 102 unsigned long start, end; 103 int i; 104 105 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 106 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 107 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 108 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 109 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 110 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 111 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 112 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 113 __arch_set_page_dat(kasan_early_shadow_pte, 1); 114 115 for_each_physmem_usable_range(i, &start, &end) { 116 kasan_populate((unsigned long)__identity_va(start), 117 (unsigned long)__identity_va(end), 118 POPULATE_KASAN_MAP_SHADOW); 119 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { 120 kasan_populate((unsigned long)__identity_va(memgap_start), 121 (unsigned long)__identity_va(start), 122 POPULATE_KASAN_ZERO_SHADOW); 123 } 124 memgap_start = end; 125 } 126 kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); 127 kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); 128 kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); 129 /* shallowly populate kasan shadow for vmalloc and modules */ 130 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 131 /* populate kasan shadow for untracked memory */ 132 kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, 133 POPULATE_KASAN_ZERO_SHADOW); 134 kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 135 } 136 137 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 138 unsigned long end, enum populate_mode mode) 139 { 140 if (mode == POPULATE_KASAN_ZERO_SHADOW && 141 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 142 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 143 return true; 144 } 145 return false; 146 } 147 148 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 149 unsigned long end, enum populate_mode mode) 150 { 151 if (mode == POPULATE_KASAN_ZERO_SHADOW && 152 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 153 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 154 return true; 155 } 156 return false; 157 } 158 159 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 160 unsigned long end, enum populate_mode mode) 161 { 162 if (mode == POPULATE_KASAN_ZERO_SHADOW && 163 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 164 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 165 return true; 166 } 167 return false; 168 } 169 170 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 171 unsigned long end, enum populate_mode mode) 172 { 173 if (mode == POPULATE_KASAN_ZERO_SHADOW && 174 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 175 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 176 return true; 177 } 178 return false; 179 } 180 181 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 182 { 183 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 184 set_pte(pte, pte_z); 185 return true; 186 } 187 return false; 188 } 189 #else 190 191 static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 192 { 193 } 194 195 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 196 unsigned long end, enum populate_mode mode) 197 { 198 return false; 199 } 200 201 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 202 unsigned long end, enum populate_mode mode) 203 { 204 return false; 205 } 206 207 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 208 unsigned long end, enum populate_mode mode) 209 { 210 return false; 211 } 212 213 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 214 unsigned long end, enum populate_mode mode) 215 { 216 return false; 217 } 218 219 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 220 { 221 return false; 222 } 223 224 #endif 225 226 /* 227 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 228 */ 229 static inline pte_t *__virt_to_kpte(unsigned long va) 230 { 231 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 232 } 233 234 static void *boot_crst_alloc(unsigned long val) 235 { 236 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 237 unsigned long *table; 238 239 table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); 240 crst_table_init(table, val); 241 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 242 return table; 243 } 244 245 static pte_t *boot_pte_alloc(void) 246 { 247 pte_t *pte; 248 249 pte = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 250 __arch_set_page_dat(pte, 1); 251 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 252 return pte; 253 } 254 255 static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, 256 enum populate_mode mode) 257 { 258 switch (mode) { 259 case POPULATE_NONE: 260 return INVALID_PHYS_ADDR; 261 case POPULATE_DIRECT: 262 return addr; 263 case POPULATE_LOWCORE: 264 return __lowcore_pa(addr); 265 case POPULATE_ABS_LOWCORE: 266 return __abs_lowcore_pa(addr); 267 case POPULATE_KERNEL: 268 return __kernel_pa(addr); 269 case POPULATE_IDENTITY: 270 return __identity_pa(addr); 271 #ifdef CONFIG_KASAN 272 case POPULATE_KASAN_MAP_SHADOW: 273 /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ 274 addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); 275 if (addr) { 276 memset((void *)addr, 0, size); 277 return addr; 278 } 279 return INVALID_PHYS_ADDR; 280 #endif 281 default: 282 return INVALID_PHYS_ADDR; 283 } 284 } 285 286 static bool large_page_mapping_allowed(enum populate_mode mode) 287 { 288 switch (mode) { 289 case POPULATE_DIRECT: 290 case POPULATE_IDENTITY: 291 case POPULATE_KERNEL: 292 #ifdef CONFIG_KASAN 293 case POPULATE_KASAN_MAP_SHADOW: 294 #endif 295 return true; 296 default: 297 return false; 298 } 299 } 300 301 static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, 302 enum populate_mode mode) 303 { 304 unsigned long pa, size = end - addr; 305 306 if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || 307 !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) 308 return INVALID_PHYS_ADDR; 309 310 pa = resolve_pa_may_alloc(addr, size, mode); 311 if (!IS_ALIGNED(pa, PUD_SIZE)) 312 return INVALID_PHYS_ADDR; 313 314 return pa; 315 } 316 317 static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, 318 enum populate_mode mode) 319 { 320 unsigned long pa, size = end - addr; 321 322 if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || 323 !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) 324 return INVALID_PHYS_ADDR; 325 326 pa = resolve_pa_may_alloc(addr, size, mode); 327 if (!IS_ALIGNED(pa, PMD_SIZE)) 328 return INVALID_PHYS_ADDR; 329 330 return pa; 331 } 332 333 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 334 enum populate_mode mode) 335 { 336 unsigned long pages = 0; 337 pte_t *pte, entry; 338 339 pte = pte_offset_kernel(pmd, addr); 340 for (; addr < end; addr += PAGE_SIZE, pte++) { 341 if (pte_none(*pte)) { 342 if (kasan_pte_populate_zero_shadow(pte, mode)) 343 continue; 344 entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); 345 entry = set_pte_bit(entry, PAGE_KERNEL); 346 set_pte(pte, entry); 347 pages++; 348 } 349 } 350 if (mode == POPULATE_IDENTITY) 351 update_page_count(PG_DIRECT_MAP_4K, pages); 352 } 353 354 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 355 enum populate_mode mode) 356 { 357 unsigned long pa, next, pages = 0; 358 pmd_t *pmd, entry; 359 pte_t *pte; 360 361 pmd = pmd_offset(pud, addr); 362 for (; addr < end; addr = next, pmd++) { 363 next = pmd_addr_end(addr, end); 364 if (pmd_none(*pmd)) { 365 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 366 continue; 367 pa = try_get_large_pmd_pa(pmd, addr, next, mode); 368 if (pa != INVALID_PHYS_ADDR) { 369 entry = __pmd(pa); 370 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 371 set_pmd(pmd, entry); 372 pages++; 373 continue; 374 } 375 pte = boot_pte_alloc(); 376 pmd_populate(&init_mm, pmd, pte); 377 } else if (pmd_leaf(*pmd)) { 378 continue; 379 } 380 pgtable_pte_populate(pmd, addr, next, mode); 381 } 382 if (mode == POPULATE_IDENTITY) 383 update_page_count(PG_DIRECT_MAP_1M, pages); 384 } 385 386 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 387 enum populate_mode mode) 388 { 389 unsigned long pa, next, pages = 0; 390 pud_t *pud, entry; 391 pmd_t *pmd; 392 393 pud = pud_offset(p4d, addr); 394 for (; addr < end; addr = next, pud++) { 395 next = pud_addr_end(addr, end); 396 if (pud_none(*pud)) { 397 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 398 continue; 399 pa = try_get_large_pud_pa(pud, addr, next, mode); 400 if (pa != INVALID_PHYS_ADDR) { 401 entry = __pud(pa); 402 entry = set_pud_bit(entry, REGION3_KERNEL); 403 set_pud(pud, entry); 404 pages++; 405 continue; 406 } 407 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 408 pud_populate(&init_mm, pud, pmd); 409 } else if (pud_leaf(*pud)) { 410 continue; 411 } 412 pgtable_pmd_populate(pud, addr, next, mode); 413 } 414 if (mode == POPULATE_IDENTITY) 415 update_page_count(PG_DIRECT_MAP_2G, pages); 416 } 417 418 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 419 enum populate_mode mode) 420 { 421 unsigned long next; 422 p4d_t *p4d; 423 pud_t *pud; 424 425 p4d = p4d_offset(pgd, addr); 426 for (; addr < end; addr = next, p4d++) { 427 next = p4d_addr_end(addr, end); 428 if (p4d_none(*p4d)) { 429 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 430 continue; 431 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 432 p4d_populate(&init_mm, p4d, pud); 433 } 434 pgtable_pud_populate(p4d, addr, next, mode); 435 } 436 } 437 438 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 439 { 440 unsigned long next; 441 pgd_t *pgd; 442 p4d_t *p4d; 443 444 if (!is_kasan_populate_mode(mode)) { 445 boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", 446 get_populate_mode_name(mode), addr, end, 447 resolve_pa_may_alloc(addr, 0, mode), 448 resolve_pa_may_alloc(end - 1, 0, mode) + 1); 449 } 450 451 pgd = pgd_offset(&init_mm, addr); 452 for (; addr < end; addr = next, pgd++) { 453 next = pgd_addr_end(addr, end); 454 if (pgd_none(*pgd)) { 455 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 456 continue; 457 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 458 pgd_populate(&init_mm, pgd, p4d); 459 } 460 #ifdef CONFIG_KASAN 461 if (mode == POPULATE_KASAN_SHALLOW) 462 continue; 463 #endif 464 pgtable_p4d_populate(pgd, addr, next, mode); 465 } 466 } 467 468 void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) 469 { 470 unsigned long lowcore_address = 0; 471 unsigned long start, end; 472 unsigned long asce_type; 473 unsigned long asce_bits; 474 pgd_t *init_mm_pgd; 475 int i; 476 477 /* 478 * Mark whole memory as no-dat. This must be done before any 479 * page tables are allocated, or kernel image builtin pages 480 * are marked as dat tables. 481 */ 482 for_each_physmem_online_range(i, &start, &end) 483 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 484 485 /* 486 * init_mm->pgd contains virtual address of swapper_pg_dir. 487 * It is unusable at this stage since DAT is yet off. Swap 488 * it for physical address of swapper_pg_dir and restore 489 * the virtual address after all page tables are created. 490 */ 491 init_mm_pgd = init_mm.pgd; 492 init_mm.pgd = (pgd_t *)swapper_pg_dir; 493 494 if (asce_limit == _REGION1_SIZE) { 495 asce_type = _REGION2_ENTRY_EMPTY; 496 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 497 } else { 498 asce_type = _REGION3_ENTRY_EMPTY; 499 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 500 } 501 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 502 503 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 504 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 505 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 506 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 507 508 if (machine_has_relocated_lowcore()) 509 lowcore_address = LOWCORE_ALT_ADDRESS; 510 511 /* 512 * To allow prefixing the lowcore must be mapped with 4KB pages. 513 * To prevent creation of a large page at address 0 first map 514 * the lowcore and create the identity mapping only afterwards. 515 */ 516 pgtable_populate(lowcore_address, 517 lowcore_address + sizeof(struct lowcore), 518 POPULATE_LOWCORE); 519 for_each_physmem_usable_range(i, &start, &end) { 520 /* Do not map lowcore with identity mapping */ 521 if (!start) 522 start = sizeof(struct lowcore); 523 pgtable_populate((unsigned long)__identity_va(start), 524 (unsigned long)__identity_va(end), 525 POPULATE_IDENTITY); 526 } 527 528 /* 529 * [kernel_start..kernel_start + TEXT_OFFSET] region is never 530 * accessed as per the linker script: 531 * 532 * . = TEXT_OFFSET; 533 * 534 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to 535 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. 536 */ 537 pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); 538 pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); 539 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 540 POPULATE_ABS_LOWCORE); 541 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 542 POPULATE_NONE); 543 memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); 544 545 kasan_populate_shadow(kernel_start, kernel_end); 546 547 get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; 548 get_lowcore()->user_asce = s390_invalid_asce; 549 550 local_ctl_load(1, &get_lowcore()->kernel_asce); 551 local_ctl_load(7, &get_lowcore()->user_asce); 552 local_ctl_load(13, &get_lowcore()->kernel_asce); 553 554 init_mm.context.asce = get_lowcore()->kernel_asce.val; 555 init_mm.pgd = init_mm_pgd; 556 } 557