1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * S390 version 4 * Copyright IBM Corp. 1999, 2012 5 * Author(s): Hartmut Penner (hp@de.ibm.com), 6 * Martin Schwidefsky (schwidefsky@de.ibm.com) 7 * 8 * Derived from "arch/i386/kernel/setup.c" 9 * Copyright (C) 1995, Linus Torvalds 10 */ 11 12 /* 13 * This file handles the architecture-dependent parts of initialization 14 */ 15 16 #define KMSG_COMPONENT "setup" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/errno.h> 20 #include <linux/export.h> 21 #include <linux/sched.h> 22 #include <linux/sched/task.h> 23 #include <linux/cpu.h> 24 #include <linux/kernel.h> 25 #include <linux/memblock.h> 26 #include <linux/mm.h> 27 #include <linux/stddef.h> 28 #include <linux/unistd.h> 29 #include <linux/ptrace.h> 30 #include <linux/random.h> 31 #include <linux/user.h> 32 #include <linux/tty.h> 33 #include <linux/ioport.h> 34 #include <linux/delay.h> 35 #include <linux/init.h> 36 #include <linux/initrd.h> 37 #include <linux/root_dev.h> 38 #include <linux/console.h> 39 #include <linux/kernel_stat.h> 40 #include <linux/dma-map-ops.h> 41 #include <linux/device.h> 42 #include <linux/notifier.h> 43 #include <linux/pfn.h> 44 #include <linux/ctype.h> 45 #include <linux/reboot.h> 46 #include <linux/topology.h> 47 #include <linux/kexec.h> 48 #include <linux/crash_dump.h> 49 #include <linux/memory.h> 50 #include <linux/compat.h> 51 #include <linux/start_kernel.h> 52 #include <linux/hugetlb.h> 53 54 #include <asm/boot_data.h> 55 #include <asm/ipl.h> 56 #include <asm/facility.h> 57 #include <asm/smp.h> 58 #include <asm/mmu_context.h> 59 #include <asm/cpcmd.h> 60 #include <asm/lowcore.h> 61 #include <asm/nmi.h> 62 #include <asm/irq.h> 63 #include <asm/page.h> 64 #include <asm/ptrace.h> 65 #include <asm/sections.h> 66 #include <asm/ebcdic.h> 67 #include <asm/diag.h> 68 #include <asm/os_info.h> 69 #include <asm/sclp.h> 70 #include <asm/stacktrace.h> 71 #include <asm/sysinfo.h> 72 #include <asm/numa.h> 73 #include <asm/alternative.h> 74 #include <asm/nospec-branch.h> 75 #include <asm/mem_detect.h> 76 #include <asm/uv.h> 77 #include <asm/asm-offsets.h> 78 #include "entry.h" 79 80 /* 81 * Machine setup.. 82 */ 83 unsigned int console_mode = 0; 84 EXPORT_SYMBOL(console_mode); 85 86 unsigned int console_devno = -1; 87 EXPORT_SYMBOL(console_devno); 88 89 unsigned int console_irq = -1; 90 EXPORT_SYMBOL(console_irq); 91 92 /* 93 * Some code and data needs to stay below 2 GB, even when the kernel would be 94 * relocated above 2 GB, because it has to use 31 bit addresses. 95 * Such code and data is part of the .amode31 section. 96 */ 97 unsigned long __amode31_ref __samode31 = __pa(&_samode31); 98 unsigned long __amode31_ref __eamode31 = __pa(&_eamode31); 99 unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31); 100 unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31); 101 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; 102 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; 103 104 /* 105 * Control registers CR2, CR5 and CR15 are initialized with addresses 106 * of tables that must be placed below 2G which is handled by the AMODE31 107 * sections. 108 * Because the AMODE31 sections are relocated below 2G at startup, 109 * the content of control registers CR2, CR5 and CR15 must be updated 110 * with new addresses after the relocation. The initial initialization of 111 * control registers occurs in head64.S and then gets updated again after AMODE31 112 * relocation. We must access the relevant AMODE31 tables indirectly via 113 * pointers placed in the .amode31.refs linker section. Those pointers get 114 * updated automatically during AMODE31 relocation and always contain a valid 115 * address within AMODE31 sections. 116 */ 117 118 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); 119 120 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { 121 [1] = 0xffffffffffffffff 122 }; 123 124 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { 125 0x80000000, 0, 0, 0, 126 0x80000000, 0, 0, 0, 127 0x80000000, 0, 0, 0, 128 0x80000000, 0, 0, 0, 129 0x80000000, 0, 0, 0, 130 0x80000000, 0, 0, 0, 131 0x80000000, 0, 0, 0, 132 0x80000000, 0, 0, 0 133 }; 134 135 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { 136 0, 0, 0x89000000, 0, 137 0, 0, 0x8a000000, 0 138 }; 139 140 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; 141 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; 142 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; 143 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; 144 145 int __bootdata(noexec_disabled); 146 unsigned long __bootdata(ident_map_size); 147 struct mem_detect_info __bootdata(mem_detect); 148 struct initrd_data __bootdata(initrd_data); 149 150 unsigned long __bootdata_preserved(__kaslr_offset); 151 unsigned int __bootdata_preserved(zlib_dfltcc_support); 152 EXPORT_SYMBOL(zlib_dfltcc_support); 153 u64 __bootdata_preserved(stfle_fac_list[16]); 154 EXPORT_SYMBOL(stfle_fac_list); 155 u64 __bootdata_preserved(alt_stfle_fac_list[16]); 156 struct oldmem_data __bootdata_preserved(oldmem_data); 157 158 unsigned long VMALLOC_START; 159 EXPORT_SYMBOL(VMALLOC_START); 160 161 unsigned long VMALLOC_END; 162 EXPORT_SYMBOL(VMALLOC_END); 163 164 struct page *vmemmap; 165 EXPORT_SYMBOL(vmemmap); 166 unsigned long vmemmap_size; 167 168 unsigned long MODULES_VADDR; 169 unsigned long MODULES_END; 170 171 /* An array with a pointer to the lowcore of every CPU. */ 172 struct lowcore *lowcore_ptr[NR_CPUS]; 173 EXPORT_SYMBOL(lowcore_ptr); 174 175 /* 176 * The Write Back bit position in the physaddr is given by the SLPC PCI. 177 * Leaving the mask zero always uses write through which is safe 178 */ 179 unsigned long mio_wb_bit_mask __ro_after_init; 180 181 /* 182 * This is set up by the setup-routine at boot-time 183 * for S390 need to find out, what we have to setup 184 * using address 0x10400 ... 185 */ 186 187 #include <asm/setup.h> 188 189 /* 190 * condev= and conmode= setup parameter. 191 */ 192 193 static int __init condev_setup(char *str) 194 { 195 int vdev; 196 197 vdev = simple_strtoul(str, &str, 0); 198 if (vdev >= 0 && vdev < 65536) { 199 console_devno = vdev; 200 console_irq = -1; 201 } 202 return 1; 203 } 204 205 __setup("condev=", condev_setup); 206 207 static void __init set_preferred_console(void) 208 { 209 if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) 210 add_preferred_console("ttyS", 0, NULL); 211 else if (CONSOLE_IS_3270) 212 add_preferred_console("tty3270", 0, NULL); 213 else if (CONSOLE_IS_VT220) 214 add_preferred_console("ttysclp", 0, NULL); 215 else if (CONSOLE_IS_HVC) 216 add_preferred_console("hvc", 0, NULL); 217 } 218 219 static int __init conmode_setup(char *str) 220 { 221 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 222 if (!strcmp(str, "hwc") || !strcmp(str, "sclp")) 223 SET_CONSOLE_SCLP; 224 #endif 225 #if defined(CONFIG_TN3215_CONSOLE) 226 if (!strcmp(str, "3215")) 227 SET_CONSOLE_3215; 228 #endif 229 #if defined(CONFIG_TN3270_CONSOLE) 230 if (!strcmp(str, "3270")) 231 SET_CONSOLE_3270; 232 #endif 233 set_preferred_console(); 234 return 1; 235 } 236 237 __setup("conmode=", conmode_setup); 238 239 static void __init conmode_default(void) 240 { 241 char query_buffer[1024]; 242 char *ptr; 243 244 if (MACHINE_IS_VM) { 245 cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); 246 console_devno = simple_strtoul(query_buffer + 5, NULL, 16); 247 ptr = strstr(query_buffer, "SUBCHANNEL ="); 248 console_irq = simple_strtoul(ptr + 13, NULL, 16); 249 cpcmd("QUERY TERM", query_buffer, 1024, NULL); 250 ptr = strstr(query_buffer, "CONMODE"); 251 /* 252 * Set the conmode to 3215 so that the device recognition 253 * will set the cu_type of the console to 3215. If the 254 * conmode is 3270 and we don't set it back then both 255 * 3215 and the 3270 driver will try to access the console 256 * device (3215 as console and 3270 as normal tty). 257 */ 258 cpcmd("TERM CONMODE 3215", NULL, 0, NULL); 259 if (ptr == NULL) { 260 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 261 SET_CONSOLE_SCLP; 262 #endif 263 return; 264 } 265 if (str_has_prefix(ptr + 8, "3270")) { 266 #if defined(CONFIG_TN3270_CONSOLE) 267 SET_CONSOLE_3270; 268 #elif defined(CONFIG_TN3215_CONSOLE) 269 SET_CONSOLE_3215; 270 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 271 SET_CONSOLE_SCLP; 272 #endif 273 } else if (str_has_prefix(ptr + 8, "3215")) { 274 #if defined(CONFIG_TN3215_CONSOLE) 275 SET_CONSOLE_3215; 276 #elif defined(CONFIG_TN3270_CONSOLE) 277 SET_CONSOLE_3270; 278 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 279 SET_CONSOLE_SCLP; 280 #endif 281 } 282 } else if (MACHINE_IS_KVM) { 283 if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) 284 SET_CONSOLE_VT220; 285 else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) 286 SET_CONSOLE_SCLP; 287 else 288 SET_CONSOLE_HVC; 289 } else { 290 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) 291 SET_CONSOLE_SCLP; 292 #endif 293 } 294 } 295 296 #ifdef CONFIG_CRASH_DUMP 297 static void __init setup_zfcpdump(void) 298 { 299 if (!is_ipl_type_dump()) 300 return; 301 if (oldmem_data.start) 302 return; 303 strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); 304 console_loglevel = 2; 305 } 306 #else 307 static inline void setup_zfcpdump(void) {} 308 #endif /* CONFIG_CRASH_DUMP */ 309 310 /* 311 * Reboot, halt and power_off stubs. They just call _machine_restart, 312 * _machine_halt or _machine_power_off. 313 */ 314 315 void machine_restart(char *command) 316 { 317 if ((!in_interrupt() && !in_atomic()) || oops_in_progress) 318 /* 319 * Only unblank the console if we are called in enabled 320 * context or a bust_spinlocks cleared the way for us. 321 */ 322 console_unblank(); 323 _machine_restart(command); 324 } 325 326 void machine_halt(void) 327 { 328 if (!in_interrupt() || oops_in_progress) 329 /* 330 * Only unblank the console if we are called in enabled 331 * context or a bust_spinlocks cleared the way for us. 332 */ 333 console_unblank(); 334 _machine_halt(); 335 } 336 337 void machine_power_off(void) 338 { 339 if (!in_interrupt() || oops_in_progress) 340 /* 341 * Only unblank the console if we are called in enabled 342 * context or a bust_spinlocks cleared the way for us. 343 */ 344 console_unblank(); 345 _machine_power_off(); 346 } 347 348 /* 349 * Dummy power off function. 350 */ 351 void (*pm_power_off)(void) = machine_power_off; 352 EXPORT_SYMBOL_GPL(pm_power_off); 353 354 void *restart_stack; 355 356 unsigned long stack_alloc(void) 357 { 358 #ifdef CONFIG_VMAP_STACK 359 return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, 360 THREADINFO_GFP, NUMA_NO_NODE, 361 __builtin_return_address(0)); 362 #else 363 return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 364 #endif 365 } 366 367 void stack_free(unsigned long stack) 368 { 369 #ifdef CONFIG_VMAP_STACK 370 vfree((void *) stack); 371 #else 372 free_pages(stack, THREAD_SIZE_ORDER); 373 #endif 374 } 375 376 int __init arch_early_irq_init(void) 377 { 378 unsigned long stack; 379 380 stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 381 if (!stack) 382 panic("Couldn't allocate async stack"); 383 S390_lowcore.async_stack = stack + STACK_INIT_OFFSET; 384 return 0; 385 } 386 387 void __init arch_call_rest_init(void) 388 { 389 unsigned long stack; 390 391 stack = stack_alloc(); 392 if (!stack) 393 panic("Couldn't allocate kernel stack"); 394 current->stack = (void *) stack; 395 #ifdef CONFIG_VMAP_STACK 396 current->stack_vm_area = (void *) stack; 397 #endif 398 set_task_stack_end_magic(current); 399 stack += STACK_INIT_OFFSET; 400 S390_lowcore.kernel_stack = stack; 401 call_on_stack_noreturn(rest_init, stack); 402 } 403 404 static void __init setup_lowcore_dat_off(void) 405 { 406 unsigned long int_psw_mask = PSW_KERNEL_BITS; 407 unsigned long mcck_stack; 408 struct lowcore *lc; 409 410 if (IS_ENABLED(CONFIG_KASAN)) 411 int_psw_mask |= PSW_MASK_DAT; 412 413 /* 414 * Setup lowcore for boot cpu 415 */ 416 BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); 417 lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); 418 if (!lc) 419 panic("%s: Failed to allocate %zu bytes align=%zx\n", 420 __func__, sizeof(*lc), sizeof(*lc)); 421 422 lc->restart_psw.mask = PSW_KERNEL_BITS; 423 lc->restart_psw.addr = (unsigned long) restart_int_handler; 424 lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 425 lc->external_new_psw.addr = (unsigned long) ext_int_handler; 426 lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 427 lc->svc_new_psw.addr = (unsigned long) system_call; 428 lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 429 lc->program_new_psw.addr = (unsigned long) pgm_check_handler; 430 lc->mcck_new_psw.mask = PSW_KERNEL_BITS; 431 lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; 432 lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; 433 lc->io_new_psw.addr = (unsigned long) io_int_handler; 434 lc->clock_comparator = clock_comparator_max; 435 lc->nodat_stack = ((unsigned long) &init_thread_union) 436 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 437 lc->current_task = (unsigned long)&init_task; 438 lc->lpp = LPP_MAGIC; 439 lc->machine_flags = S390_lowcore.machine_flags; 440 lc->preempt_count = S390_lowcore.preempt_count; 441 nmi_alloc_boot_cpu(lc); 442 lc->sys_enter_timer = S390_lowcore.sys_enter_timer; 443 lc->exit_timer = S390_lowcore.exit_timer; 444 lc->user_timer = S390_lowcore.user_timer; 445 lc->system_timer = S390_lowcore.system_timer; 446 lc->steal_timer = S390_lowcore.steal_timer; 447 lc->last_update_timer = S390_lowcore.last_update_timer; 448 lc->last_update_clock = S390_lowcore.last_update_clock; 449 450 /* 451 * Allocate the global restart stack which is the same for 452 * all CPUs in cast *one* of them does a PSW restart. 453 */ 454 restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 455 if (!restart_stack) 456 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 457 __func__, THREAD_SIZE, THREAD_SIZE); 458 restart_stack += STACK_INIT_OFFSET; 459 460 /* 461 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant 462 * restart data to the absolute zero lowcore. This is necessary if 463 * PSW restart is done on an offline CPU that has lowcore zero. 464 */ 465 lc->restart_stack = (unsigned long) restart_stack; 466 lc->restart_fn = (unsigned long) do_restart; 467 lc->restart_data = 0; 468 lc->restart_source = -1U; 469 470 mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); 471 if (!mcck_stack) 472 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 473 __func__, THREAD_SIZE, THREAD_SIZE); 474 lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; 475 476 /* Setup absolute zero lowcore */ 477 mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); 478 mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); 479 mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); 480 mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); 481 mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); 482 483 lc->spinlock_lockval = arch_spin_lockval(0); 484 lc->spinlock_index = 0; 485 arch_spin_lock_setup(0); 486 lc->br_r1_trampoline = 0x07f1; /* br %r1 */ 487 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 488 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 489 lc->preempt_count = PREEMPT_DISABLED; 490 491 set_prefix((u32)(unsigned long) lc); 492 lowcore_ptr[0] = lc; 493 } 494 495 static void __init setup_lowcore_dat_on(void) 496 { 497 struct lowcore *lc = lowcore_ptr[0]; 498 499 __ctl_clear_bit(0, 28); 500 S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; 501 S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; 502 S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; 503 S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; 504 __ctl_store(S390_lowcore.cregs_save_area, 0, 15); 505 __ctl_set_bit(0, 28); 506 mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); 507 mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); 508 memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, 509 sizeof(S390_lowcore.cregs_save_area)); 510 } 511 512 static struct resource code_resource = { 513 .name = "Kernel code", 514 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 515 }; 516 517 static struct resource data_resource = { 518 .name = "Kernel data", 519 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 520 }; 521 522 static struct resource bss_resource = { 523 .name = "Kernel bss", 524 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 525 }; 526 527 static struct resource __initdata *standard_resources[] = { 528 &code_resource, 529 &data_resource, 530 &bss_resource, 531 }; 532 533 static void __init setup_resources(void) 534 { 535 struct resource *res, *std_res, *sub_res; 536 phys_addr_t start, end; 537 int j; 538 u64 i; 539 540 code_resource.start = (unsigned long) _text; 541 code_resource.end = (unsigned long) _etext - 1; 542 data_resource.start = (unsigned long) _etext; 543 data_resource.end = (unsigned long) _edata - 1; 544 bss_resource.start = (unsigned long) __bss_start; 545 bss_resource.end = (unsigned long) __bss_stop - 1; 546 547 for_each_mem_range(i, &start, &end) { 548 res = memblock_alloc(sizeof(*res), 8); 549 if (!res) 550 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 551 __func__, sizeof(*res), 8); 552 res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; 553 554 res->name = "System RAM"; 555 res->start = start; 556 /* 557 * In memblock, end points to the first byte after the 558 * range while in resourses, end points to the last byte in 559 * the range. 560 */ 561 res->end = end - 1; 562 request_resource(&iomem_resource, res); 563 564 for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { 565 std_res = standard_resources[j]; 566 if (std_res->start < res->start || 567 std_res->start > res->end) 568 continue; 569 if (std_res->end > res->end) { 570 sub_res = memblock_alloc(sizeof(*sub_res), 8); 571 if (!sub_res) 572 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 573 __func__, sizeof(*sub_res), 8); 574 *sub_res = *std_res; 575 sub_res->end = res->end; 576 std_res->start = res->end + 1; 577 request_resource(res, sub_res); 578 } else { 579 request_resource(res, std_res); 580 } 581 } 582 } 583 #ifdef CONFIG_CRASH_DUMP 584 /* 585 * Re-add removed crash kernel memory as reserved memory. This makes 586 * sure it will be mapped with the identity mapping and struct pages 587 * will be created, so it can be resized later on. 588 * However add it later since the crash kernel resource should not be 589 * part of the System RAM resource. 590 */ 591 if (crashk_res.end) { 592 memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); 593 memblock_reserve(crashk_res.start, resource_size(&crashk_res)); 594 insert_resource(&iomem_resource, &crashk_res); 595 } 596 #endif 597 } 598 599 static void __init setup_memory_end(void) 600 { 601 memblock_remove(ident_map_size, ULONG_MAX); 602 max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); 603 pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); 604 } 605 606 #ifdef CONFIG_CRASH_DUMP 607 608 /* 609 * When kdump is enabled, we have to ensure that no memory from the area 610 * [0 - crashkernel memory size] is set offline - it will be exchanged with 611 * the crashkernel memory region when kdump is triggered. The crashkernel 612 * memory region can never get offlined (pages are unmovable). 613 */ 614 static int kdump_mem_notifier(struct notifier_block *nb, 615 unsigned long action, void *data) 616 { 617 struct memory_notify *arg = data; 618 619 if (action != MEM_GOING_OFFLINE) 620 return NOTIFY_OK; 621 if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) 622 return NOTIFY_BAD; 623 return NOTIFY_OK; 624 } 625 626 static struct notifier_block kdump_mem_nb = { 627 .notifier_call = kdump_mem_notifier, 628 }; 629 630 #endif 631 632 /* 633 * Make sure that the area above identity mapping is protected 634 */ 635 static void __init reserve_above_ident_map(void) 636 { 637 memblock_reserve(ident_map_size, ULONG_MAX); 638 } 639 640 /* 641 * Reserve memory for kdump kernel to be loaded with kexec 642 */ 643 static void __init reserve_crashkernel(void) 644 { 645 #ifdef CONFIG_CRASH_DUMP 646 unsigned long long crash_base, crash_size; 647 phys_addr_t low, high; 648 int rc; 649 650 rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size, 651 &crash_base); 652 653 crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); 654 crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); 655 if (rc || crash_size == 0) 656 return; 657 658 if (memblock.memory.regions[0].size < crash_size) { 659 pr_info("crashkernel reservation failed: %s\n", 660 "first memory chunk must be at least crashkernel size"); 661 return; 662 } 663 664 low = crash_base ?: oldmem_data.start; 665 high = low + crash_size; 666 if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { 667 /* The crashkernel fits into OLDMEM, reuse OLDMEM */ 668 crash_base = low; 669 } else { 670 /* Find suitable area in free memory */ 671 low = max_t(unsigned long, crash_size, sclp.hsa_size); 672 high = crash_base ? crash_base + crash_size : ULONG_MAX; 673 674 if (crash_base && crash_base < low) { 675 pr_info("crashkernel reservation failed: %s\n", 676 "crash_base too low"); 677 return; 678 } 679 low = crash_base ?: low; 680 crash_base = memblock_find_in_range(low, high, crash_size, 681 KEXEC_CRASH_MEM_ALIGN); 682 } 683 684 if (!crash_base) { 685 pr_info("crashkernel reservation failed: %s\n", 686 "no suitable area found"); 687 return; 688 } 689 690 if (register_memory_notifier(&kdump_mem_nb)) 691 return; 692 693 if (!oldmem_data.start && MACHINE_IS_VM) 694 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); 695 crashk_res.start = crash_base; 696 crashk_res.end = crash_base + crash_size - 1; 697 memblock_remove(crash_base, crash_size); 698 pr_info("Reserving %lluMB of memory at %lluMB " 699 "for crashkernel (System RAM: %luMB)\n", 700 crash_size >> 20, crash_base >> 20, 701 (unsigned long)memblock.memory.total_size >> 20); 702 os_info_crashkernel_add(crash_base, crash_size); 703 #endif 704 } 705 706 /* 707 * Reserve the initrd from being used by memblock 708 */ 709 static void __init reserve_initrd(void) 710 { 711 #ifdef CONFIG_BLK_DEV_INITRD 712 if (!initrd_data.start || !initrd_data.size) 713 return; 714 initrd_start = initrd_data.start; 715 initrd_end = initrd_start + initrd_data.size; 716 memblock_reserve(initrd_data.start, initrd_data.size); 717 #endif 718 } 719 720 /* 721 * Reserve the memory area used to pass the certificate lists 722 */ 723 static void __init reserve_certificate_list(void) 724 { 725 if (ipl_cert_list_addr) 726 memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); 727 } 728 729 static void __init reserve_mem_detect_info(void) 730 { 731 unsigned long start, size; 732 733 get_mem_detect_reserved(&start, &size); 734 if (size) 735 memblock_reserve(start, size); 736 } 737 738 static void __init free_mem_detect_info(void) 739 { 740 unsigned long start, size; 741 742 get_mem_detect_reserved(&start, &size); 743 if (size) 744 memblock_free(start, size); 745 } 746 747 static const char * __init get_mem_info_source(void) 748 { 749 switch (mem_detect.info_source) { 750 case MEM_DETECT_SCLP_STOR_INFO: 751 return "sclp storage info"; 752 case MEM_DETECT_DIAG260: 753 return "diag260"; 754 case MEM_DETECT_SCLP_READ_INFO: 755 return "sclp read info"; 756 case MEM_DETECT_BIN_SEARCH: 757 return "binary search"; 758 } 759 return "none"; 760 } 761 762 static void __init memblock_add_mem_detect_info(void) 763 { 764 unsigned long start, end; 765 int i; 766 767 pr_debug("physmem info source: %s (%hhd)\n", 768 get_mem_info_source(), mem_detect.info_source); 769 /* keep memblock lists close to the kernel */ 770 memblock_set_bottom_up(true); 771 for_each_mem_detect_block(i, &start, &end) { 772 memblock_add(start, end - start); 773 memblock_physmem_add(start, end - start); 774 } 775 memblock_set_bottom_up(false); 776 memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); 777 memblock_dump_all(); 778 } 779 780 /* 781 * Check for initrd being in usable memory 782 */ 783 static void __init check_initrd(void) 784 { 785 #ifdef CONFIG_BLK_DEV_INITRD 786 if (initrd_data.start && initrd_data.size && 787 !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { 788 pr_err("The initial RAM disk does not fit into the memory\n"); 789 memblock_free(initrd_data.start, initrd_data.size); 790 initrd_start = initrd_end = 0; 791 } 792 #endif 793 } 794 795 /* 796 * Reserve memory used for lowcore/command line/kernel image. 797 */ 798 static void __init reserve_kernel(void) 799 { 800 unsigned long start_pfn = PFN_UP(__pa(_end)); 801 802 memblock_reserve(0, STARTUP_NORMAL_OFFSET); 803 memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP); 804 memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) 805 - (unsigned long)_stext); 806 } 807 808 static void __init setup_memory(void) 809 { 810 phys_addr_t start, end; 811 u64 i; 812 813 /* 814 * Init storage key for present memory 815 */ 816 for_each_mem_range(i, &start, &end) 817 storage_key_init_range(start, end); 818 819 psw_set_key(PAGE_DEFAULT_KEY); 820 821 /* Only cosmetics */ 822 memblock_enforce_memory_limit(memblock_end_of_DRAM()); 823 } 824 825 static void __init relocate_amode31_section(void) 826 { 827 unsigned long amode31_addr, amode31_size; 828 long amode31_offset; 829 long *ptr; 830 831 /* Allocate a new AMODE31 capable memory region */ 832 amode31_size = __eamode31 - __samode31; 833 pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); 834 amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE); 835 if (!amode31_addr) 836 panic("Failed to allocate memory for AMODE31 section\n"); 837 amode31_offset = amode31_addr - __samode31; 838 839 /* Move original AMODE31 section to the new one */ 840 memmove((void *)amode31_addr, (void *)__samode31, amode31_size); 841 /* Zero out the old AMODE31 section to catch invalid accesses within it */ 842 memset((void *)__samode31, 0, amode31_size); 843 844 /* Update all AMODE31 region references */ 845 for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) 846 *ptr += amode31_offset; 847 } 848 849 /* This must be called after AMODE31 relocation */ 850 static void __init setup_cr(void) 851 { 852 union ctlreg2 cr2; 853 union ctlreg5 cr5; 854 union ctlreg15 cr15; 855 856 __ctl_duct[1] = (unsigned long)__ctl_aste; 857 __ctl_duct[2] = (unsigned long)__ctl_aste; 858 __ctl_duct[4] = (unsigned long)__ctl_duald; 859 860 /* Update control registers CR2, CR5 and CR15 */ 861 __ctl_store(cr2.val, 2, 2); 862 __ctl_store(cr5.val, 5, 5); 863 __ctl_store(cr15.val, 15, 15); 864 cr2.ducto = (unsigned long)__ctl_duct >> 6; 865 cr5.pasteo = (unsigned long)__ctl_duct >> 6; 866 cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; 867 __ctl_load(cr2.val, 2, 2); 868 __ctl_load(cr5.val, 5, 5); 869 __ctl_load(cr15.val, 15, 15); 870 } 871 872 /* 873 * Add system information as device randomness 874 */ 875 static void __init setup_randomness(void) 876 { 877 struct sysinfo_3_2_2 *vmms; 878 879 vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE, 880 PAGE_SIZE); 881 if (!vmms) 882 panic("Failed to allocate memory for sysinfo structure\n"); 883 884 if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) 885 add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); 886 memblock_free((unsigned long) vmms, PAGE_SIZE); 887 } 888 889 /* 890 * Find the correct size for the task_struct. This depends on 891 * the size of the struct fpu at the end of the thread_struct 892 * which is embedded in the task_struct. 893 */ 894 static void __init setup_task_size(void) 895 { 896 int task_size = sizeof(struct task_struct); 897 898 if (!MACHINE_HAS_VX) { 899 task_size -= sizeof(__vector128) * __NUM_VXRS; 900 task_size += sizeof(freg_t) * __NUM_FPRS; 901 } 902 arch_task_struct_size = task_size; 903 } 904 905 /* 906 * Issue diagnose 318 to set the control program name and 907 * version codes. 908 */ 909 static void __init setup_control_program_code(void) 910 { 911 union diag318_info diag318_info = { 912 .cpnc = CPNC_LINUX, 913 .cpvc = 0, 914 }; 915 916 if (!sclp.has_diag318) 917 return; 918 919 diag_stat_inc(DIAG_STAT_X318); 920 asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); 921 } 922 923 /* 924 * Print the component list from the IPL report 925 */ 926 static void __init log_component_list(void) 927 { 928 struct ipl_rb_component_entry *ptr, *end; 929 char *str; 930 931 if (!early_ipl_comp_list_addr) 932 return; 933 if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) 934 pr_info("Linux is running with Secure-IPL enabled\n"); 935 else 936 pr_info("Linux is running with Secure-IPL disabled\n"); 937 ptr = (void *) early_ipl_comp_list_addr; 938 end = (void *) ptr + early_ipl_comp_list_size; 939 pr_info("The IPL report contains the following components:\n"); 940 while (ptr < end) { 941 if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) { 942 if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED) 943 str = "signed, verified"; 944 else 945 str = "signed, verification failed"; 946 } else { 947 str = "not signed"; 948 } 949 pr_info("%016llx - %016llx (%s)\n", 950 ptr->addr, ptr->addr + ptr->len, str); 951 ptr++; 952 } 953 } 954 955 /* 956 * Setup function called from init/main.c just after the banner 957 * was printed. 958 */ 959 960 void __init setup_arch(char **cmdline_p) 961 { 962 /* 963 * print what head.S has found out about the machine 964 */ 965 if (MACHINE_IS_VM) 966 pr_info("Linux is running as a z/VM " 967 "guest operating system in 64-bit mode\n"); 968 else if (MACHINE_IS_KVM) 969 pr_info("Linux is running under KVM in 64-bit mode\n"); 970 else if (MACHINE_IS_LPAR) 971 pr_info("Linux is running natively in 64-bit mode\n"); 972 else 973 pr_info("Linux is running as a guest in 64-bit mode\n"); 974 975 log_component_list(); 976 977 /* Have one command line that is parsed and saved in /proc/cmdline */ 978 /* boot_command_line has been already set up in early.c */ 979 *cmdline_p = boot_command_line; 980 981 ROOT_DEV = Root_RAM0; 982 983 setup_initial_init_mm(_text, _etext, _edata, _end); 984 985 if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) 986 nospec_auto_detect(); 987 988 jump_label_init(); 989 parse_early_param(); 990 #ifdef CONFIG_CRASH_DUMP 991 /* Deactivate elfcorehdr= kernel parameter */ 992 elfcorehdr_addr = ELFCORE_ADDR_MAX; 993 #endif 994 995 os_info_init(); 996 setup_ipl(); 997 setup_task_size(); 998 setup_control_program_code(); 999 1000 /* Do some memory reservations *before* memory is added to memblock */ 1001 reserve_above_ident_map(); 1002 reserve_kernel(); 1003 reserve_initrd(); 1004 reserve_certificate_list(); 1005 reserve_mem_detect_info(); 1006 memblock_allow_resize(); 1007 1008 /* Get information about *all* installed memory */ 1009 memblock_add_mem_detect_info(); 1010 1011 free_mem_detect_info(); 1012 1013 relocate_amode31_section(); 1014 setup_cr(); 1015 1016 setup_uv(); 1017 setup_memory_end(); 1018 setup_memory(); 1019 dma_contiguous_reserve(ident_map_size); 1020 vmcp_cma_reserve(); 1021 if (MACHINE_HAS_EDAT2) 1022 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); 1023 1024 check_initrd(); 1025 reserve_crashkernel(); 1026 #ifdef CONFIG_CRASH_DUMP 1027 /* 1028 * Be aware that smp_save_dump_cpus() triggers a system reset. 1029 * Therefore CPU and device initialization should be done afterwards. 1030 */ 1031 smp_save_dump_cpus(); 1032 #endif 1033 1034 setup_resources(); 1035 setup_lowcore_dat_off(); 1036 smp_fill_possible_mask(); 1037 cpu_detect_mhz_feature(); 1038 cpu_init(); 1039 numa_setup(); 1040 smp_detect_cpus(); 1041 topology_init_early(); 1042 1043 /* 1044 * Create kernel page tables and switch to virtual addressing. 1045 */ 1046 paging_init(); 1047 1048 /* 1049 * After paging_init created the kernel page table, the new PSWs 1050 * in lowcore can now run with DAT enabled. 1051 */ 1052 setup_lowcore_dat_on(); 1053 1054 /* Setup default console */ 1055 conmode_default(); 1056 set_preferred_console(); 1057 1058 apply_alternative_instructions(); 1059 if (IS_ENABLED(CONFIG_EXPOLINE)) 1060 nospec_init_branches(); 1061 1062 /* Setup zfcp/nvme dump support */ 1063 setup_zfcpdump(); 1064 1065 /* Add system specific data to the random pool */ 1066 setup_randomness(); 1067 } 1068