1 // SPDX-License-Identifier: GPL-2.0 2 #define boot_fmt(fmt) "vmem: " fmt 3 #include <linux/cpufeature.h> 4 #include <linux/sched/task.h> 5 #include <linux/pgtable.h> 6 #include <linux/kasan.h> 7 #include <asm/page-states.h> 8 #include <asm/pgalloc.h> 9 #include <asm/facility.h> 10 #include <asm/sections.h> 11 #include <asm/ctlreg.h> 12 #include <asm/physmem_info.h> 13 #include <asm/maccess.h> 14 #include <asm/machine.h> 15 #include <asm/abs_lowcore.h> 16 #include "decompressor.h" 17 #include "boot.h" 18 19 struct ctlreg __bootdata_preserved(s390_invalid_asce); 20 21 #ifdef CONFIG_PROC_FS 22 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 23 #endif 24 25 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 26 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 27 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 28 29 enum populate_mode { 30 POPULATE_NONE, 31 POPULATE_DIRECT, 32 POPULATE_LOWCORE, 33 POPULATE_ABS_LOWCORE, 34 POPULATE_IDENTITY, 35 POPULATE_KERNEL, 36 #ifdef CONFIG_KASAN 37 /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ 38 POPULATE_KASAN_MAP_SHADOW, 39 POPULATE_KASAN_ZERO_SHADOW, 40 POPULATE_KASAN_SHALLOW 41 #endif 42 }; 43 44 #define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t 45 static inline const char *get_populate_mode_name(enum populate_mode t) 46 { 47 switch (t) { 48 POPULATE_MODE_NAME(NONE); 49 POPULATE_MODE_NAME(DIRECT); 50 POPULATE_MODE_NAME(LOWCORE); 51 POPULATE_MODE_NAME(ABS_LOWCORE); 52 POPULATE_MODE_NAME(IDENTITY); 53 POPULATE_MODE_NAME(KERNEL); 54 #ifdef CONFIG_KASAN 55 POPULATE_MODE_NAME(KASAN_MAP_SHADOW); 56 POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); 57 POPULATE_MODE_NAME(KASAN_SHALLOW); 58 #endif 59 default: 60 return "UNKNOWN"; 61 } 62 } 63 64 static bool is_kasan_populate_mode(enum populate_mode mode) 65 { 66 #ifdef CONFIG_KASAN 67 return mode >= POPULATE_KASAN_MAP_SHADOW; 68 #else 69 return false; 70 #endif 71 } 72 73 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 74 75 #ifdef CONFIG_KASAN 76 77 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 78 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 79 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 80 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 81 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 82 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 83 84 static pte_t pte_z; 85 86 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 87 { 88 unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); 89 unsigned long sha_end = PAGE_ALIGN(__sha(end)); 90 91 boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), 92 start, end, sha_start, sha_end); 93 pgtable_populate(sha_start, sha_end, mode); 94 } 95 96 static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 97 { 98 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 99 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 100 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 101 unsigned long memgap_start = 0; 102 unsigned long start, end; 103 int i; 104 105 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 106 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 107 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 108 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 109 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 110 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 111 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 112 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 113 __arch_set_page_dat(kasan_early_shadow_pte, 1); 114 115 for_each_physmem_usable_range(i, &start, &end) { 116 kasan_populate((unsigned long)__identity_va(start), 117 (unsigned long)__identity_va(end), 118 POPULATE_KASAN_MAP_SHADOW); 119 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { 120 kasan_populate((unsigned long)__identity_va(memgap_start), 121 (unsigned long)__identity_va(start), 122 POPULATE_KASAN_ZERO_SHADOW); 123 } 124 memgap_start = end; 125 } 126 kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); 127 kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); 128 kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); 129 /* shallowly populate kasan shadow for vmalloc and modules */ 130 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 131 /* populate kasan shadow for untracked memory */ 132 kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, 133 POPULATE_KASAN_ZERO_SHADOW); 134 kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 135 } 136 137 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 138 unsigned long end, enum populate_mode mode) 139 { 140 if (mode == POPULATE_KASAN_ZERO_SHADOW && 141 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 142 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 143 return true; 144 } 145 return false; 146 } 147 148 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 149 unsigned long end, enum populate_mode mode) 150 { 151 if (mode == POPULATE_KASAN_ZERO_SHADOW && 152 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 153 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 154 return true; 155 } 156 return false; 157 } 158 159 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 160 unsigned long end, enum populate_mode mode) 161 { 162 if (mode == POPULATE_KASAN_ZERO_SHADOW && 163 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 164 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 165 return true; 166 } 167 return false; 168 } 169 170 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 171 unsigned long end, enum populate_mode mode) 172 { 173 if (mode == POPULATE_KASAN_ZERO_SHADOW && 174 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 175 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 176 return true; 177 } 178 return false; 179 } 180 181 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 182 { 183 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 184 set_pte(pte, pte_z); 185 return true; 186 } 187 return false; 188 } 189 #else 190 191 static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 192 { 193 } 194 195 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 196 unsigned long end, enum populate_mode mode) 197 { 198 return false; 199 } 200 201 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 202 unsigned long end, enum populate_mode mode) 203 { 204 return false; 205 } 206 207 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 208 unsigned long end, enum populate_mode mode) 209 { 210 return false; 211 } 212 213 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 214 unsigned long end, enum populate_mode mode) 215 { 216 return false; 217 } 218 219 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 220 { 221 return false; 222 } 223 224 #endif 225 226 /* 227 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 228 */ 229 static inline pte_t *__virt_to_kpte(unsigned long va) 230 { 231 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 232 } 233 234 static void *boot_crst_alloc(unsigned long val) 235 { 236 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 237 unsigned long *table; 238 239 table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); 240 crst_table_init(table, val); 241 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 242 return table; 243 } 244 245 static pte_t *boot_pte_alloc(void) 246 { 247 static void *pte_leftover; 248 pte_t *pte; 249 250 /* 251 * handling pte_leftovers this way helps to avoid memory fragmentation 252 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off 253 */ 254 if (!pte_leftover) { 255 pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 256 pte = pte_leftover + _PAGE_TABLE_SIZE; 257 __arch_set_page_dat(pte, 1); 258 } else { 259 pte = pte_leftover; 260 pte_leftover = NULL; 261 } 262 263 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 264 return pte; 265 } 266 267 static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, 268 enum populate_mode mode) 269 { 270 switch (mode) { 271 case POPULATE_NONE: 272 return INVALID_PHYS_ADDR; 273 case POPULATE_DIRECT: 274 return addr; 275 case POPULATE_LOWCORE: 276 return __lowcore_pa(addr); 277 case POPULATE_ABS_LOWCORE: 278 return __abs_lowcore_pa(addr); 279 case POPULATE_KERNEL: 280 return __kernel_pa(addr); 281 case POPULATE_IDENTITY: 282 return __identity_pa(addr); 283 #ifdef CONFIG_KASAN 284 case POPULATE_KASAN_MAP_SHADOW: 285 /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ 286 addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); 287 if (addr) { 288 memset((void *)addr, 0, size); 289 return addr; 290 } 291 return INVALID_PHYS_ADDR; 292 #endif 293 default: 294 return INVALID_PHYS_ADDR; 295 } 296 } 297 298 static bool large_page_mapping_allowed(enum populate_mode mode) 299 { 300 switch (mode) { 301 case POPULATE_DIRECT: 302 case POPULATE_IDENTITY: 303 case POPULATE_KERNEL: 304 #ifdef CONFIG_KASAN 305 case POPULATE_KASAN_MAP_SHADOW: 306 #endif 307 return true; 308 default: 309 return false; 310 } 311 } 312 313 static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, 314 enum populate_mode mode) 315 { 316 unsigned long pa, size = end - addr; 317 318 if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || 319 !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) 320 return INVALID_PHYS_ADDR; 321 322 pa = resolve_pa_may_alloc(addr, size, mode); 323 if (!IS_ALIGNED(pa, PUD_SIZE)) 324 return INVALID_PHYS_ADDR; 325 326 return pa; 327 } 328 329 static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, 330 enum populate_mode mode) 331 { 332 unsigned long pa, size = end - addr; 333 334 if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || 335 !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) 336 return INVALID_PHYS_ADDR; 337 338 pa = resolve_pa_may_alloc(addr, size, mode); 339 if (!IS_ALIGNED(pa, PMD_SIZE)) 340 return INVALID_PHYS_ADDR; 341 342 return pa; 343 } 344 345 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 346 enum populate_mode mode) 347 { 348 unsigned long pages = 0; 349 pte_t *pte, entry; 350 351 pte = pte_offset_kernel(pmd, addr); 352 for (; addr < end; addr += PAGE_SIZE, pte++) { 353 if (pte_none(*pte)) { 354 if (kasan_pte_populate_zero_shadow(pte, mode)) 355 continue; 356 entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); 357 entry = set_pte_bit(entry, PAGE_KERNEL); 358 set_pte(pte, entry); 359 pages++; 360 } 361 } 362 if (mode == POPULATE_IDENTITY) 363 update_page_count(PG_DIRECT_MAP_4K, pages); 364 } 365 366 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 367 enum populate_mode mode) 368 { 369 unsigned long pa, next, pages = 0; 370 pmd_t *pmd, entry; 371 pte_t *pte; 372 373 pmd = pmd_offset(pud, addr); 374 for (; addr < end; addr = next, pmd++) { 375 next = pmd_addr_end(addr, end); 376 if (pmd_none(*pmd)) { 377 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 378 continue; 379 pa = try_get_large_pmd_pa(pmd, addr, next, mode); 380 if (pa != INVALID_PHYS_ADDR) { 381 entry = __pmd(pa); 382 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 383 set_pmd(pmd, entry); 384 pages++; 385 continue; 386 } 387 pte = boot_pte_alloc(); 388 pmd_populate(&init_mm, pmd, pte); 389 } else if (pmd_leaf(*pmd)) { 390 continue; 391 } 392 pgtable_pte_populate(pmd, addr, next, mode); 393 } 394 if (mode == POPULATE_IDENTITY) 395 update_page_count(PG_DIRECT_MAP_1M, pages); 396 } 397 398 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 399 enum populate_mode mode) 400 { 401 unsigned long pa, next, pages = 0; 402 pud_t *pud, entry; 403 pmd_t *pmd; 404 405 pud = pud_offset(p4d, addr); 406 for (; addr < end; addr = next, pud++) { 407 next = pud_addr_end(addr, end); 408 if (pud_none(*pud)) { 409 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 410 continue; 411 pa = try_get_large_pud_pa(pud, addr, next, mode); 412 if (pa != INVALID_PHYS_ADDR) { 413 entry = __pud(pa); 414 entry = set_pud_bit(entry, REGION3_KERNEL); 415 set_pud(pud, entry); 416 pages++; 417 continue; 418 } 419 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 420 pud_populate(&init_mm, pud, pmd); 421 } else if (pud_leaf(*pud)) { 422 continue; 423 } 424 pgtable_pmd_populate(pud, addr, next, mode); 425 } 426 if (mode == POPULATE_IDENTITY) 427 update_page_count(PG_DIRECT_MAP_2G, pages); 428 } 429 430 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 431 enum populate_mode mode) 432 { 433 unsigned long next; 434 p4d_t *p4d; 435 pud_t *pud; 436 437 p4d = p4d_offset(pgd, addr); 438 for (; addr < end; addr = next, p4d++) { 439 next = p4d_addr_end(addr, end); 440 if (p4d_none(*p4d)) { 441 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 442 continue; 443 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 444 p4d_populate(&init_mm, p4d, pud); 445 } 446 pgtable_pud_populate(p4d, addr, next, mode); 447 } 448 } 449 450 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 451 { 452 unsigned long next; 453 pgd_t *pgd; 454 p4d_t *p4d; 455 456 if (!is_kasan_populate_mode(mode)) { 457 boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", 458 get_populate_mode_name(mode), addr, end, 459 resolve_pa_may_alloc(addr, 0, mode), 460 resolve_pa_may_alloc(end - 1, 0, mode) + 1); 461 } 462 463 pgd = pgd_offset(&init_mm, addr); 464 for (; addr < end; addr = next, pgd++) { 465 next = pgd_addr_end(addr, end); 466 if (pgd_none(*pgd)) { 467 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 468 continue; 469 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 470 pgd_populate(&init_mm, pgd, p4d); 471 } 472 #ifdef CONFIG_KASAN 473 if (mode == POPULATE_KASAN_SHALLOW) 474 continue; 475 #endif 476 pgtable_p4d_populate(pgd, addr, next, mode); 477 } 478 } 479 480 void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) 481 { 482 unsigned long lowcore_address = 0; 483 unsigned long start, end; 484 unsigned long asce_type; 485 unsigned long asce_bits; 486 pgd_t *init_mm_pgd; 487 int i; 488 489 /* 490 * Mark whole memory as no-dat. This must be done before any 491 * page tables are allocated, or kernel image builtin pages 492 * are marked as dat tables. 493 */ 494 for_each_physmem_online_range(i, &start, &end) 495 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 496 497 /* 498 * init_mm->pgd contains virtual address of swapper_pg_dir. 499 * It is unusable at this stage since DAT is yet off. Swap 500 * it for physical address of swapper_pg_dir and restore 501 * the virtual address after all page tables are created. 502 */ 503 init_mm_pgd = init_mm.pgd; 504 init_mm.pgd = (pgd_t *)swapper_pg_dir; 505 506 if (asce_limit == _REGION1_SIZE) { 507 asce_type = _REGION2_ENTRY_EMPTY; 508 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 509 } else { 510 asce_type = _REGION3_ENTRY_EMPTY; 511 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 512 } 513 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 514 515 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 516 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 517 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 518 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 519 520 if (machine_has_relocated_lowcore()) 521 lowcore_address = LOWCORE_ALT_ADDRESS; 522 523 /* 524 * To allow prefixing the lowcore must be mapped with 4KB pages. 525 * To prevent creation of a large page at address 0 first map 526 * the lowcore and create the identity mapping only afterwards. 527 */ 528 pgtable_populate(lowcore_address, 529 lowcore_address + sizeof(struct lowcore), 530 POPULATE_LOWCORE); 531 for_each_physmem_usable_range(i, &start, &end) { 532 /* Do not map lowcore with identity mapping */ 533 if (!start) 534 start = sizeof(struct lowcore); 535 pgtable_populate((unsigned long)__identity_va(start), 536 (unsigned long)__identity_va(end), 537 POPULATE_IDENTITY); 538 } 539 540 /* 541 * [kernel_start..kernel_start + TEXT_OFFSET] region is never 542 * accessed as per the linker script: 543 * 544 * . = TEXT_OFFSET; 545 * 546 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to 547 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. 548 */ 549 pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); 550 pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); 551 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 552 POPULATE_ABS_LOWCORE); 553 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 554 POPULATE_NONE); 555 memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); 556 557 kasan_populate_shadow(kernel_start, kernel_end); 558 559 get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; 560 get_lowcore()->user_asce = s390_invalid_asce; 561 562 local_ctl_load(1, &get_lowcore()->kernel_asce); 563 local_ctl_load(7, &get_lowcore()->user_asce); 564 local_ctl_load(13, &get_lowcore()->kernel_asce); 565 566 init_mm.context.asce = get_lowcore()->kernel_asce.val; 567 init_mm.pgd = init_mm_pgd; 568 } 569