1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/sched/task.h> 3 #include <linux/pgtable.h> 4 #include <linux/kasan.h> 5 #include <asm/page-states.h> 6 #include <asm/pgalloc.h> 7 #include <asm/facility.h> 8 #include <asm/sections.h> 9 #include <asm/ctlreg.h> 10 #include <asm/physmem_info.h> 11 #include <asm/maccess.h> 12 #include <asm/abs_lowcore.h> 13 #include "decompressor.h" 14 #include "boot.h" 15 16 struct ctlreg __bootdata_preserved(s390_invalid_asce); 17 18 #ifdef CONFIG_PROC_FS 19 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 20 #endif 21 22 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 23 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 24 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 25 26 enum populate_mode { 27 POPULATE_NONE, 28 POPULATE_DIRECT, 29 POPULATE_ABS_LOWCORE, 30 #ifdef CONFIG_KASAN 31 POPULATE_KASAN_MAP_SHADOW, 32 POPULATE_KASAN_ZERO_SHADOW, 33 POPULATE_KASAN_SHALLOW 34 #endif 35 }; 36 37 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 38 39 #ifdef CONFIG_KASAN 40 41 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 42 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 43 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 44 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 45 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 46 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 47 48 static pte_t pte_z; 49 50 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 51 { 52 start = PAGE_ALIGN_DOWN(__sha(start)); 53 end = PAGE_ALIGN(__sha(end)); 54 pgtable_populate(start, end, mode); 55 } 56 57 static void kasan_populate_shadow(void) 58 { 59 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 60 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 61 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 62 unsigned long memgap_start = 0; 63 unsigned long untracked_end; 64 unsigned long start, end; 65 int i; 66 67 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 68 if (!machine.has_nx) 69 pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); 70 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 71 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 72 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 73 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 74 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 75 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 76 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 77 __arch_set_page_dat(kasan_early_shadow_pte, 1); 78 79 /* 80 * Current memory layout: 81 * +- 0 -------------+ +- shadow start -+ 82 * |1:1 ident mapping| /|1/8 of ident map| 83 * | | / | | 84 * +-end of ident map+ / +----------------+ 85 * | ... gap ... | / | kasan | 86 * | | / | zero page | 87 * +- vmalloc area -+ / | mapping | 88 * | vmalloc_size | / | (untracked) | 89 * +- modules vaddr -+ / +----------------+ 90 * | 2Gb |/ | unmapped | allocated per module 91 * +- shadow start -+ +----------------+ 92 * | 1/8 addr space | | zero pg mapping| (untracked) 93 * +- shadow end ----+---------+- shadow end ---+ 94 * 95 * Current memory layout (KASAN_VMALLOC): 96 * +- 0 -------------+ +- shadow start -+ 97 * |1:1 ident mapping| /|1/8 of ident map| 98 * | | / | | 99 * +-end of ident map+ / +----------------+ 100 * | ... gap ... | / | kasan zero page| (untracked) 101 * | | / | mapping | 102 * +- vmalloc area -+ / +----------------+ 103 * | vmalloc_size | / |shallow populate| 104 * +- modules vaddr -+ / +----------------+ 105 * | 2Gb |/ |shallow populate| 106 * +- shadow start -+ +----------------+ 107 * | 1/8 addr space | | zero pg mapping| (untracked) 108 * +- shadow end ----+---------+- shadow end ---+ 109 */ 110 111 for_each_physmem_usable_range(i, &start, &end) { 112 kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); 113 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) 114 kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); 115 memgap_start = end; 116 } 117 if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { 118 untracked_end = VMALLOC_START; 119 /* shallowly populate kasan shadow for vmalloc and modules */ 120 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 121 } else { 122 untracked_end = MODULES_VADDR; 123 } 124 /* populate kasan shadow for untracked memory */ 125 kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); 126 kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 127 } 128 129 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 130 unsigned long end, enum populate_mode mode) 131 { 132 if (mode == POPULATE_KASAN_ZERO_SHADOW && 133 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 134 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 135 return true; 136 } 137 return false; 138 } 139 140 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 141 unsigned long end, enum populate_mode mode) 142 { 143 if (mode == POPULATE_KASAN_ZERO_SHADOW && 144 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 145 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 146 return true; 147 } 148 return false; 149 } 150 151 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 152 unsigned long end, enum populate_mode mode) 153 { 154 if (mode == POPULATE_KASAN_ZERO_SHADOW && 155 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 156 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 157 return true; 158 } 159 return false; 160 } 161 162 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 163 unsigned long end, enum populate_mode mode) 164 { 165 if (mode == POPULATE_KASAN_ZERO_SHADOW && 166 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 167 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 168 return true; 169 } 170 return false; 171 } 172 173 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 174 { 175 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 176 set_pte(pte, pte_z); 177 return true; 178 } 179 return false; 180 } 181 #else 182 183 static inline void kasan_populate_shadow(void) {} 184 185 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 186 unsigned long end, enum populate_mode mode) 187 { 188 return false; 189 } 190 191 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 192 unsigned long end, enum populate_mode mode) 193 { 194 return false; 195 } 196 197 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 198 unsigned long end, enum populate_mode mode) 199 { 200 return false; 201 } 202 203 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 204 unsigned long end, enum populate_mode mode) 205 { 206 return false; 207 } 208 209 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 210 { 211 return false; 212 } 213 214 #endif 215 216 /* 217 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 218 */ 219 static inline pte_t *__virt_to_kpte(unsigned long va) 220 { 221 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 222 } 223 224 static void *boot_crst_alloc(unsigned long val) 225 { 226 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 227 unsigned long *table; 228 229 table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); 230 crst_table_init(table, val); 231 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 232 return table; 233 } 234 235 static pte_t *boot_pte_alloc(void) 236 { 237 static void *pte_leftover; 238 pte_t *pte; 239 240 /* 241 * handling pte_leftovers this way helps to avoid memory fragmentation 242 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off 243 */ 244 if (!pte_leftover) { 245 pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 246 pte = pte_leftover + _PAGE_TABLE_SIZE; 247 __arch_set_page_dat(pte, 1); 248 } else { 249 pte = pte_leftover; 250 pte_leftover = NULL; 251 } 252 253 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 254 return pte; 255 } 256 257 static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) 258 { 259 switch (mode) { 260 case POPULATE_NONE: 261 return -1; 262 case POPULATE_DIRECT: 263 return addr; 264 case POPULATE_ABS_LOWCORE: 265 return __abs_lowcore_pa(addr); 266 #ifdef CONFIG_KASAN 267 case POPULATE_KASAN_MAP_SHADOW: 268 addr = physmem_alloc_top_down(RR_VMEM, size, size); 269 memset((void *)addr, 0, size); 270 return addr; 271 #endif 272 default: 273 return -1; 274 } 275 } 276 277 static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end) 278 { 279 return machine.has_edat2 && 280 IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE; 281 } 282 283 static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) 284 { 285 return machine.has_edat1 && 286 IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE; 287 } 288 289 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 290 enum populate_mode mode) 291 { 292 unsigned long pages = 0; 293 pte_t *pte, entry; 294 295 pte = pte_offset_kernel(pmd, addr); 296 for (; addr < end; addr += PAGE_SIZE, pte++) { 297 if (pte_none(*pte)) { 298 if (kasan_pte_populate_zero_shadow(pte, mode)) 299 continue; 300 entry = __pte(_pa(addr, PAGE_SIZE, mode)); 301 entry = set_pte_bit(entry, PAGE_KERNEL); 302 if (!machine.has_nx) 303 entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); 304 set_pte(pte, entry); 305 pages++; 306 } 307 } 308 if (mode == POPULATE_DIRECT) 309 update_page_count(PG_DIRECT_MAP_4K, pages); 310 } 311 312 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 313 enum populate_mode mode) 314 { 315 unsigned long next, pages = 0; 316 pmd_t *pmd, entry; 317 pte_t *pte; 318 319 pmd = pmd_offset(pud, addr); 320 for (; addr < end; addr = next, pmd++) { 321 next = pmd_addr_end(addr, end); 322 if (pmd_none(*pmd)) { 323 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 324 continue; 325 if (can_large_pmd(pmd, addr, next)) { 326 entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); 327 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 328 if (!machine.has_nx) 329 entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); 330 set_pmd(pmd, entry); 331 pages++; 332 continue; 333 } 334 pte = boot_pte_alloc(); 335 pmd_populate(&init_mm, pmd, pte); 336 } else if (pmd_large(*pmd)) { 337 continue; 338 } 339 pgtable_pte_populate(pmd, addr, next, mode); 340 } 341 if (mode == POPULATE_DIRECT) 342 update_page_count(PG_DIRECT_MAP_1M, pages); 343 } 344 345 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 346 enum populate_mode mode) 347 { 348 unsigned long next, pages = 0; 349 pud_t *pud, entry; 350 pmd_t *pmd; 351 352 pud = pud_offset(p4d, addr); 353 for (; addr < end; addr = next, pud++) { 354 next = pud_addr_end(addr, end); 355 if (pud_none(*pud)) { 356 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 357 continue; 358 if (can_large_pud(pud, addr, next)) { 359 entry = __pud(_pa(addr, _REGION3_SIZE, mode)); 360 entry = set_pud_bit(entry, REGION3_KERNEL); 361 if (!machine.has_nx) 362 entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); 363 set_pud(pud, entry); 364 pages++; 365 continue; 366 } 367 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 368 pud_populate(&init_mm, pud, pmd); 369 } else if (pud_large(*pud)) { 370 continue; 371 } 372 pgtable_pmd_populate(pud, addr, next, mode); 373 } 374 if (mode == POPULATE_DIRECT) 375 update_page_count(PG_DIRECT_MAP_2G, pages); 376 } 377 378 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 379 enum populate_mode mode) 380 { 381 unsigned long next; 382 p4d_t *p4d; 383 pud_t *pud; 384 385 p4d = p4d_offset(pgd, addr); 386 for (; addr < end; addr = next, p4d++) { 387 next = p4d_addr_end(addr, end); 388 if (p4d_none(*p4d)) { 389 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 390 continue; 391 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 392 p4d_populate(&init_mm, p4d, pud); 393 } 394 pgtable_pud_populate(p4d, addr, next, mode); 395 } 396 } 397 398 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 399 { 400 unsigned long next; 401 pgd_t *pgd; 402 p4d_t *p4d; 403 404 pgd = pgd_offset(&init_mm, addr); 405 for (; addr < end; addr = next, pgd++) { 406 next = pgd_addr_end(addr, end); 407 if (pgd_none(*pgd)) { 408 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 409 continue; 410 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 411 pgd_populate(&init_mm, pgd, p4d); 412 } 413 #ifdef CONFIG_KASAN 414 if (mode == POPULATE_KASAN_SHALLOW) 415 continue; 416 #endif 417 pgtable_p4d_populate(pgd, addr, next, mode); 418 } 419 } 420 421 void setup_vmem(unsigned long asce_limit) 422 { 423 unsigned long start, end; 424 unsigned long asce_type; 425 unsigned long asce_bits; 426 int i; 427 428 /* 429 * Mark whole memory as no-dat. This must be done before any 430 * page tables are allocated, or kernel image builtin pages 431 * are marked as dat tables. 432 */ 433 for_each_physmem_online_range(i, &start, &end) 434 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 435 436 if (asce_limit == _REGION1_SIZE) { 437 asce_type = _REGION2_ENTRY_EMPTY; 438 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 439 } else { 440 asce_type = _REGION3_ENTRY_EMPTY; 441 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 442 } 443 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 444 445 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 446 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 447 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 448 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 449 450 /* 451 * To allow prefixing the lowcore must be mapped with 4KB pages. 452 * To prevent creation of a large page at address 0 first map 453 * the lowcore and create the identity mapping only afterwards. 454 */ 455 pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); 456 for_each_physmem_usable_range(i, &start, &end) 457 pgtable_populate(start, end, POPULATE_DIRECT); 458 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 459 POPULATE_ABS_LOWCORE); 460 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 461 POPULATE_NONE); 462 memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); 463 464 kasan_populate_shadow(); 465 466 S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; 467 S390_lowcore.user_asce = s390_invalid_asce; 468 469 local_ctl_load(1, &S390_lowcore.kernel_asce); 470 local_ctl_load(7, &S390_lowcore.user_asce); 471 local_ctl_load(13, &S390_lowcore.kernel_asce); 472 473 init_mm.context.asce = S390_lowcore.kernel_asce.val; 474 } 475