1 // SPDX-License-Identifier: GPL-2.0 2 #define boot_fmt(fmt) "vmem: " fmt 3 #include <linux/sched/task.h> 4 #include <linux/pgtable.h> 5 #include <linux/kasan.h> 6 #include <asm/page-states.h> 7 #include <asm/pgalloc.h> 8 #include <asm/facility.h> 9 #include <asm/sections.h> 10 #include <asm/ctlreg.h> 11 #include <asm/physmem_info.h> 12 #include <asm/maccess.h> 13 #include <asm/abs_lowcore.h> 14 #include "decompressor.h" 15 #include "boot.h" 16 17 #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 18 struct ctlreg __bootdata_preserved(s390_invalid_asce); 19 20 #ifdef CONFIG_PROC_FS 21 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 22 #endif 23 24 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 25 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 26 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 27 28 enum populate_mode { 29 POPULATE_NONE, 30 POPULATE_DIRECT, 31 POPULATE_LOWCORE, 32 POPULATE_ABS_LOWCORE, 33 POPULATE_IDENTITY, 34 POPULATE_KERNEL, 35 #ifdef CONFIG_KASAN 36 /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ 37 POPULATE_KASAN_MAP_SHADOW, 38 POPULATE_KASAN_ZERO_SHADOW, 39 POPULATE_KASAN_SHALLOW 40 #endif 41 }; 42 43 #define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t 44 static inline const char *get_populate_mode_name(enum populate_mode t) 45 { 46 switch (t) { 47 POPULATE_MODE_NAME(NONE); 48 POPULATE_MODE_NAME(DIRECT); 49 POPULATE_MODE_NAME(LOWCORE); 50 POPULATE_MODE_NAME(ABS_LOWCORE); 51 POPULATE_MODE_NAME(IDENTITY); 52 POPULATE_MODE_NAME(KERNEL); 53 #ifdef CONFIG_KASAN 54 POPULATE_MODE_NAME(KASAN_MAP_SHADOW); 55 POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); 56 POPULATE_MODE_NAME(KASAN_SHALLOW); 57 #endif 58 default: 59 return "UNKNOWN"; 60 } 61 } 62 63 static bool is_kasan_populate_mode(enum populate_mode mode) 64 { 65 #ifdef CONFIG_KASAN 66 return mode >= POPULATE_KASAN_MAP_SHADOW; 67 #else 68 return false; 69 #endif 70 } 71 72 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 73 74 #ifdef CONFIG_KASAN 75 76 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 77 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 78 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 79 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 80 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 81 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 82 83 static pte_t pte_z; 84 85 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 86 { 87 unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); 88 unsigned long sha_end = PAGE_ALIGN(__sha(end)); 89 90 boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), 91 start, end, sha_start, sha_end); 92 pgtable_populate(sha_start, sha_end, mode); 93 } 94 95 static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 96 { 97 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 98 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 99 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 100 unsigned long memgap_start = 0; 101 unsigned long start, end; 102 int i; 103 104 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 105 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 106 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 107 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 108 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 109 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 110 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 111 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 112 __arch_set_page_dat(kasan_early_shadow_pte, 1); 113 114 for_each_physmem_usable_range(i, &start, &end) { 115 kasan_populate((unsigned long)__identity_va(start), 116 (unsigned long)__identity_va(end), 117 POPULATE_KASAN_MAP_SHADOW); 118 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { 119 kasan_populate((unsigned long)__identity_va(memgap_start), 120 (unsigned long)__identity_va(start), 121 POPULATE_KASAN_ZERO_SHADOW); 122 } 123 memgap_start = end; 124 } 125 kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); 126 kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); 127 kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); 128 /* shallowly populate kasan shadow for vmalloc and modules */ 129 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 130 /* populate kasan shadow for untracked memory */ 131 kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, 132 POPULATE_KASAN_ZERO_SHADOW); 133 kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 134 } 135 136 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 137 unsigned long end, enum populate_mode mode) 138 { 139 if (mode == POPULATE_KASAN_ZERO_SHADOW && 140 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 141 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 142 return true; 143 } 144 return false; 145 } 146 147 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 148 unsigned long end, enum populate_mode mode) 149 { 150 if (mode == POPULATE_KASAN_ZERO_SHADOW && 151 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 152 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 153 return true; 154 } 155 return false; 156 } 157 158 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 159 unsigned long end, enum populate_mode mode) 160 { 161 if (mode == POPULATE_KASAN_ZERO_SHADOW && 162 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 163 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 164 return true; 165 } 166 return false; 167 } 168 169 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 170 unsigned long end, enum populate_mode mode) 171 { 172 if (mode == POPULATE_KASAN_ZERO_SHADOW && 173 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 174 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 175 return true; 176 } 177 return false; 178 } 179 180 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 181 { 182 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 183 set_pte(pte, pte_z); 184 return true; 185 } 186 return false; 187 } 188 #else 189 190 static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 191 { 192 } 193 194 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 195 unsigned long end, enum populate_mode mode) 196 { 197 return false; 198 } 199 200 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 201 unsigned long end, enum populate_mode mode) 202 { 203 return false; 204 } 205 206 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 207 unsigned long end, enum populate_mode mode) 208 { 209 return false; 210 } 211 212 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 213 unsigned long end, enum populate_mode mode) 214 { 215 return false; 216 } 217 218 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 219 { 220 return false; 221 } 222 223 #endif 224 225 /* 226 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 227 */ 228 static inline pte_t *__virt_to_kpte(unsigned long va) 229 { 230 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 231 } 232 233 static void *boot_crst_alloc(unsigned long val) 234 { 235 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 236 unsigned long *table; 237 238 table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); 239 crst_table_init(table, val); 240 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 241 return table; 242 } 243 244 static pte_t *boot_pte_alloc(void) 245 { 246 static void *pte_leftover; 247 pte_t *pte; 248 249 /* 250 * handling pte_leftovers this way helps to avoid memory fragmentation 251 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off 252 */ 253 if (!pte_leftover) { 254 pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 255 pte = pte_leftover + _PAGE_TABLE_SIZE; 256 __arch_set_page_dat(pte, 1); 257 } else { 258 pte = pte_leftover; 259 pte_leftover = NULL; 260 } 261 262 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 263 return pte; 264 } 265 266 static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, 267 enum populate_mode mode) 268 { 269 switch (mode) { 270 case POPULATE_NONE: 271 return INVALID_PHYS_ADDR; 272 case POPULATE_DIRECT: 273 return addr; 274 case POPULATE_LOWCORE: 275 return __lowcore_pa(addr); 276 case POPULATE_ABS_LOWCORE: 277 return __abs_lowcore_pa(addr); 278 case POPULATE_KERNEL: 279 return __kernel_pa(addr); 280 case POPULATE_IDENTITY: 281 return __identity_pa(addr); 282 #ifdef CONFIG_KASAN 283 case POPULATE_KASAN_MAP_SHADOW: 284 /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ 285 addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); 286 if (addr) { 287 memset((void *)addr, 0, size); 288 return addr; 289 } 290 return INVALID_PHYS_ADDR; 291 #endif 292 default: 293 return INVALID_PHYS_ADDR; 294 } 295 } 296 297 static bool large_page_mapping_allowed(enum populate_mode mode) 298 { 299 switch (mode) { 300 case POPULATE_DIRECT: 301 case POPULATE_IDENTITY: 302 case POPULATE_KERNEL: 303 #ifdef CONFIG_KASAN 304 case POPULATE_KASAN_MAP_SHADOW: 305 #endif 306 return true; 307 default: 308 return false; 309 } 310 } 311 312 static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, 313 enum populate_mode mode) 314 { 315 unsigned long pa, size = end - addr; 316 317 if (!machine.has_edat2 || !large_page_mapping_allowed(mode) || 318 !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) 319 return INVALID_PHYS_ADDR; 320 321 pa = resolve_pa_may_alloc(addr, size, mode); 322 if (!IS_ALIGNED(pa, PUD_SIZE)) 323 return INVALID_PHYS_ADDR; 324 325 return pa; 326 } 327 328 static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, 329 enum populate_mode mode) 330 { 331 unsigned long pa, size = end - addr; 332 333 if (!machine.has_edat1 || !large_page_mapping_allowed(mode) || 334 !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) 335 return INVALID_PHYS_ADDR; 336 337 pa = resolve_pa_may_alloc(addr, size, mode); 338 if (!IS_ALIGNED(pa, PMD_SIZE)) 339 return INVALID_PHYS_ADDR; 340 341 return pa; 342 } 343 344 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 345 enum populate_mode mode) 346 { 347 unsigned long pages = 0; 348 pte_t *pte, entry; 349 350 pte = pte_offset_kernel(pmd, addr); 351 for (; addr < end; addr += PAGE_SIZE, pte++) { 352 if (pte_none(*pte)) { 353 if (kasan_pte_populate_zero_shadow(pte, mode)) 354 continue; 355 entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); 356 entry = set_pte_bit(entry, PAGE_KERNEL); 357 set_pte(pte, entry); 358 pages++; 359 } 360 } 361 if (mode == POPULATE_IDENTITY) 362 update_page_count(PG_DIRECT_MAP_4K, pages); 363 } 364 365 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 366 enum populate_mode mode) 367 { 368 unsigned long pa, next, pages = 0; 369 pmd_t *pmd, entry; 370 pte_t *pte; 371 372 pmd = pmd_offset(pud, addr); 373 for (; addr < end; addr = next, pmd++) { 374 next = pmd_addr_end(addr, end); 375 if (pmd_none(*pmd)) { 376 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 377 continue; 378 pa = try_get_large_pmd_pa(pmd, addr, next, mode); 379 if (pa != INVALID_PHYS_ADDR) { 380 entry = __pmd(pa); 381 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 382 set_pmd(pmd, entry); 383 pages++; 384 continue; 385 } 386 pte = boot_pte_alloc(); 387 pmd_populate(&init_mm, pmd, pte); 388 } else if (pmd_leaf(*pmd)) { 389 continue; 390 } 391 pgtable_pte_populate(pmd, addr, next, mode); 392 } 393 if (mode == POPULATE_IDENTITY) 394 update_page_count(PG_DIRECT_MAP_1M, pages); 395 } 396 397 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 398 enum populate_mode mode) 399 { 400 unsigned long pa, next, pages = 0; 401 pud_t *pud, entry; 402 pmd_t *pmd; 403 404 pud = pud_offset(p4d, addr); 405 for (; addr < end; addr = next, pud++) { 406 next = pud_addr_end(addr, end); 407 if (pud_none(*pud)) { 408 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 409 continue; 410 pa = try_get_large_pud_pa(pud, addr, next, mode); 411 if (pa != INVALID_PHYS_ADDR) { 412 entry = __pud(pa); 413 entry = set_pud_bit(entry, REGION3_KERNEL); 414 set_pud(pud, entry); 415 pages++; 416 continue; 417 } 418 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 419 pud_populate(&init_mm, pud, pmd); 420 } else if (pud_leaf(*pud)) { 421 continue; 422 } 423 pgtable_pmd_populate(pud, addr, next, mode); 424 } 425 if (mode == POPULATE_IDENTITY) 426 update_page_count(PG_DIRECT_MAP_2G, pages); 427 } 428 429 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 430 enum populate_mode mode) 431 { 432 unsigned long next; 433 p4d_t *p4d; 434 pud_t *pud; 435 436 p4d = p4d_offset(pgd, addr); 437 for (; addr < end; addr = next, p4d++) { 438 next = p4d_addr_end(addr, end); 439 if (p4d_none(*p4d)) { 440 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 441 continue; 442 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 443 p4d_populate(&init_mm, p4d, pud); 444 } 445 pgtable_pud_populate(p4d, addr, next, mode); 446 } 447 } 448 449 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 450 { 451 unsigned long next; 452 pgd_t *pgd; 453 p4d_t *p4d; 454 455 if (!is_kasan_populate_mode(mode)) { 456 boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", 457 get_populate_mode_name(mode), addr, end, 458 resolve_pa_may_alloc(addr, 0, mode), 459 resolve_pa_may_alloc(end - 1, 0, mode) + 1); 460 } 461 462 pgd = pgd_offset(&init_mm, addr); 463 for (; addr < end; addr = next, pgd++) { 464 next = pgd_addr_end(addr, end); 465 if (pgd_none(*pgd)) { 466 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 467 continue; 468 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 469 pgd_populate(&init_mm, pgd, p4d); 470 } 471 #ifdef CONFIG_KASAN 472 if (mode == POPULATE_KASAN_SHALLOW) 473 continue; 474 #endif 475 pgtable_p4d_populate(pgd, addr, next, mode); 476 } 477 } 478 479 void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) 480 { 481 unsigned long lowcore_address = 0; 482 unsigned long start, end; 483 unsigned long asce_type; 484 unsigned long asce_bits; 485 pgd_t *init_mm_pgd; 486 int i; 487 488 /* 489 * Mark whole memory as no-dat. This must be done before any 490 * page tables are allocated, or kernel image builtin pages 491 * are marked as dat tables. 492 */ 493 for_each_physmem_online_range(i, &start, &end) 494 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 495 496 /* 497 * init_mm->pgd contains virtual address of swapper_pg_dir. 498 * It is unusable at this stage since DAT is yet off. Swap 499 * it for physical address of swapper_pg_dir and restore 500 * the virtual address after all page tables are created. 501 */ 502 init_mm_pgd = init_mm.pgd; 503 init_mm.pgd = (pgd_t *)swapper_pg_dir; 504 505 if (asce_limit == _REGION1_SIZE) { 506 asce_type = _REGION2_ENTRY_EMPTY; 507 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 508 } else { 509 asce_type = _REGION3_ENTRY_EMPTY; 510 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 511 } 512 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 513 514 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 515 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 516 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 517 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 518 519 if (relocate_lowcore) 520 lowcore_address = LOWCORE_ALT_ADDRESS; 521 522 /* 523 * To allow prefixing the lowcore must be mapped with 4KB pages. 524 * To prevent creation of a large page at address 0 first map 525 * the lowcore and create the identity mapping only afterwards. 526 */ 527 pgtable_populate(lowcore_address, 528 lowcore_address + sizeof(struct lowcore), 529 POPULATE_LOWCORE); 530 for_each_physmem_usable_range(i, &start, &end) { 531 pgtable_populate((unsigned long)__identity_va(start), 532 (unsigned long)__identity_va(end), 533 POPULATE_IDENTITY); 534 } 535 536 /* 537 * [kernel_start..kernel_start + TEXT_OFFSET] region is never 538 * accessed as per the linker script: 539 * 540 * . = TEXT_OFFSET; 541 * 542 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to 543 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. 544 */ 545 pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); 546 pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); 547 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 548 POPULATE_ABS_LOWCORE); 549 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 550 POPULATE_NONE); 551 memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); 552 553 kasan_populate_shadow(kernel_start, kernel_end); 554 555 get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; 556 get_lowcore()->user_asce = s390_invalid_asce; 557 558 local_ctl_load(1, &get_lowcore()->kernel_asce); 559 local_ctl_load(7, &get_lowcore()->user_asce); 560 local_ctl_load(13, &get_lowcore()->kernel_asce); 561 562 init_mm.context.asce = get_lowcore()->kernel_asce.val; 563 init_mm.pgd = init_mm_pgd; 564 } 565