xref: /linux/arch/s390/kernel/setup.c (revision 841f35a08d4ae58322ed4e7fd68a50817d122602)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11 
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15 
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/root_dev.h>
38 #include <linux/console.h>
39 #include <linux/kernel_stat.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/device.h>
42 #include <linux/notifier.h>
43 #include <linux/pfn.h>
44 #include <linux/ctype.h>
45 #include <linux/reboot.h>
46 #include <linux/topology.h>
47 #include <linux/kexec.h>
48 #include <linux/crash_dump.h>
49 #include <linux/memory.h>
50 #include <linux/compat.h>
51 #include <linux/start_kernel.h>
52 #include <linux/hugetlb.h>
53 #include <linux/kmemleak.h>
54 
55 #include <asm/archrandom.h>
56 #include <asm/boot_data.h>
57 #include <asm/machine.h>
58 #include <asm/ipl.h>
59 #include <asm/facility.h>
60 #include <asm/smp.h>
61 #include <asm/mmu_context.h>
62 #include <asm/cpcmd.h>
63 #include <asm/abs_lowcore.h>
64 #include <asm/nmi.h>
65 #include <asm/irq.h>
66 #include <asm/page.h>
67 #include <asm/ptrace.h>
68 #include <asm/sections.h>
69 #include <asm/ebcdic.h>
70 #include <asm/diag.h>
71 #include <asm/os_info.h>
72 #include <asm/sclp.h>
73 #include <asm/stacktrace.h>
74 #include <asm/sysinfo.h>
75 #include <asm/numa.h>
76 #include <asm/alternative.h>
77 #include <asm/nospec-branch.h>
78 #include <asm/physmem_info.h>
79 #include <asm/maccess.h>
80 #include <asm/uv.h>
81 #include <asm/asm-offsets.h>
82 #include "entry.h"
83 
84 /*
85  * Machine setup..
86  */
87 unsigned int console_mode = 0;
88 EXPORT_SYMBOL(console_mode);
89 
90 unsigned int console_devno = -1;
91 EXPORT_SYMBOL(console_devno);
92 
93 unsigned int console_irq = -1;
94 EXPORT_SYMBOL(console_irq);
95 
96 /*
97  * Some code and data needs to stay below 2 GB, even when the kernel would be
98  * relocated above 2 GB, because it has to use 31 bit addresses.
99  * Such code and data is part of the .amode31 section.
100  */
101 char __amode31_ref *__samode31 = _samode31;
102 char __amode31_ref *__eamode31 = _eamode31;
103 char __amode31_ref *__stext_amode31 = _stext_amode31;
104 char __amode31_ref *__etext_amode31 = _etext_amode31;
105 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
106 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
107 
108 /*
109  * Control registers CR2, CR5 and CR15 are initialized with addresses
110  * of tables that must be placed below 2G which is handled by the AMODE31
111  * sections.
112  * Because the AMODE31 sections are relocated below 2G at startup,
113  * the content of control registers CR2, CR5 and CR15 must be updated
114  * with new addresses after the relocation. The initial initialization of
115  * control registers occurs in head64.S and then gets updated again after AMODE31
116  * relocation. We must access the relevant AMODE31 tables indirectly via
117  * pointers placed in the .amode31.refs linker section. Those pointers get
118  * updated automatically during AMODE31 relocation and always contain a valid
119  * address within AMODE31 sections.
120  */
121 
122 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
123 
124 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
125 	[1] = 0xffffffffffffffff
126 };
127 
128 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
129 	0x80000000, 0, 0, 0,
130 	0x80000000, 0, 0, 0,
131 	0x80000000, 0, 0, 0,
132 	0x80000000, 0, 0, 0,
133 	0x80000000, 0, 0, 0,
134 	0x80000000, 0, 0, 0,
135 	0x80000000, 0, 0, 0,
136 	0x80000000, 0, 0, 0
137 };
138 
139 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
140 	0, 0, 0x89000000, 0,
141 	0, 0, 0x8a000000, 0
142 };
143 
144 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
145 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
146 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
147 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
148 
149 unsigned long __bootdata_preserved(max_mappable);
150 struct physmem_info __bootdata(physmem_info);
151 
152 struct vm_layout __bootdata_preserved(vm_layout);
153 EXPORT_SYMBOL(vm_layout);
154 int __bootdata_preserved(__kaslr_enabled);
155 unsigned int __bootdata_preserved(zlib_dfltcc_support);
156 EXPORT_SYMBOL(zlib_dfltcc_support);
157 u64 __bootdata_preserved(stfle_fac_list[16]);
158 EXPORT_SYMBOL(stfle_fac_list);
159 struct oldmem_data __bootdata_preserved(oldmem_data);
160 
161 char __bootdata(boot_rb)[PAGE_SIZE * 2];
162 bool __bootdata(boot_earlyprintk);
163 size_t __bootdata(boot_rb_off);
164 char __bootdata(bootdebug_filter)[128];
165 bool __bootdata(bootdebug);
166 
167 unsigned long __bootdata_preserved(VMALLOC_START);
168 EXPORT_SYMBOL(VMALLOC_START);
169 
170 unsigned long __bootdata_preserved(VMALLOC_END);
171 EXPORT_SYMBOL(VMALLOC_END);
172 
173 struct page *__bootdata_preserved(vmemmap);
174 EXPORT_SYMBOL(vmemmap);
175 unsigned long __bootdata_preserved(vmemmap_size);
176 
177 unsigned long __bootdata_preserved(MODULES_VADDR);
178 unsigned long __bootdata_preserved(MODULES_END);
179 
180 /* An array with a pointer to the lowcore of every CPU. */
181 struct lowcore *lowcore_ptr[NR_CPUS];
182 EXPORT_SYMBOL(lowcore_ptr);
183 
184 /*
185  * The Write Back bit position in the physaddr is given by the SLPC PCI.
186  * Leaving the mask zero always uses write through which is safe
187  */
188 unsigned long mio_wb_bit_mask __ro_after_init;
189 
190 /*
191  * This is set up by the setup-routine at boot-time
192  * for S390 need to find out, what we have to setup
193  * using address 0x10400 ...
194  */
195 
196 #include <asm/setup.h>
197 
198 /*
199  * condev= and conmode= setup parameter.
200  */
201 
202 static int __init condev_setup(char *str)
203 {
204 	int vdev;
205 
206 	vdev = simple_strtoul(str, &str, 0);
207 	if (vdev >= 0 && vdev < 65536) {
208 		console_devno = vdev;
209 		console_irq = -1;
210 	}
211 	return 1;
212 }
213 
214 __setup("condev=", condev_setup);
215 
216 static void __init set_preferred_console(void)
217 {
218 	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
219 		add_preferred_console("ttyS", 0, NULL);
220 	else if (CONSOLE_IS_3270)
221 		add_preferred_console("tty3270", 0, NULL);
222 	else if (CONSOLE_IS_VT220)
223 		add_preferred_console("ttysclp", 0, NULL);
224 	else if (CONSOLE_IS_HVC)
225 		add_preferred_console("hvc", 0, NULL);
226 }
227 
228 static int __init conmode_setup(char *str)
229 {
230 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
231 	if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
232                 SET_CONSOLE_SCLP;
233 #endif
234 #if defined(CONFIG_TN3215_CONSOLE)
235 	if (!strcmp(str, "3215"))
236 		SET_CONSOLE_3215;
237 #endif
238 #if defined(CONFIG_TN3270_CONSOLE)
239 	if (!strcmp(str, "3270"))
240 		SET_CONSOLE_3270;
241 #endif
242 	set_preferred_console();
243         return 1;
244 }
245 
246 __setup("conmode=", conmode_setup);
247 
248 static void __init conmode_default(void)
249 {
250 	char query_buffer[1024];
251 	char *ptr;
252 
253 	if (machine_is_vm()) {
254 		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
255 		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
256 		ptr = strstr(query_buffer, "SUBCHANNEL =");
257 		console_irq = simple_strtoul(ptr + 13, NULL, 16);
258 		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
259 		ptr = strstr(query_buffer, "CONMODE");
260 		/*
261 		 * Set the conmode to 3215 so that the device recognition
262 		 * will set the cu_type of the console to 3215. If the
263 		 * conmode is 3270 and we don't set it back then both
264 		 * 3215 and the 3270 driver will try to access the console
265 		 * device (3215 as console and 3270 as normal tty).
266 		 */
267 		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
268 		if (ptr == NULL) {
269 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
270 			SET_CONSOLE_SCLP;
271 #endif
272 			return;
273 		}
274 		if (str_has_prefix(ptr + 8, "3270")) {
275 #if defined(CONFIG_TN3270_CONSOLE)
276 			SET_CONSOLE_3270;
277 #elif defined(CONFIG_TN3215_CONSOLE)
278 			SET_CONSOLE_3215;
279 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
280 			SET_CONSOLE_SCLP;
281 #endif
282 		} else if (str_has_prefix(ptr + 8, "3215")) {
283 #if defined(CONFIG_TN3215_CONSOLE)
284 			SET_CONSOLE_3215;
285 #elif defined(CONFIG_TN3270_CONSOLE)
286 			SET_CONSOLE_3270;
287 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
288 			SET_CONSOLE_SCLP;
289 #endif
290 		}
291 	} else if (machine_is_kvm()) {
292 		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
293 			SET_CONSOLE_VT220;
294 		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
295 			SET_CONSOLE_SCLP;
296 		else
297 			SET_CONSOLE_HVC;
298 	} else {
299 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
300 		SET_CONSOLE_SCLP;
301 #endif
302 	}
303 }
304 
305 #ifdef CONFIG_CRASH_DUMP
306 static void __init setup_zfcpdump(void)
307 {
308 	if (!is_ipl_type_dump())
309 		return;
310 	if (oldmem_data.start)
311 		return;
312 	strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
313 	console_loglevel = 2;
314 }
315 #else
316 static inline void setup_zfcpdump(void) {}
317 #endif /* CONFIG_CRASH_DUMP */
318 
319  /*
320  * Reboot, halt and power_off stubs. They just call _machine_restart,
321  * _machine_halt or _machine_power_off.
322  */
323 
324 void machine_restart(char *command)
325 {
326 	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
327 		/*
328 		 * Only unblank the console if we are called in enabled
329 		 * context or a bust_spinlocks cleared the way for us.
330 		 */
331 		console_unblank();
332 	_machine_restart(command);
333 }
334 
335 void machine_halt(void)
336 {
337 	if (!in_interrupt() || oops_in_progress)
338 		/*
339 		 * Only unblank the console if we are called in enabled
340 		 * context or a bust_spinlocks cleared the way for us.
341 		 */
342 		console_unblank();
343 	_machine_halt();
344 }
345 
346 void machine_power_off(void)
347 {
348 	if (!in_interrupt() || oops_in_progress)
349 		/*
350 		 * Only unblank the console if we are called in enabled
351 		 * context or a bust_spinlocks cleared the way for us.
352 		 */
353 		console_unblank();
354 	_machine_power_off();
355 }
356 
357 /*
358  * Dummy power off function.
359  */
360 void (*pm_power_off)(void) = machine_power_off;
361 EXPORT_SYMBOL_GPL(pm_power_off);
362 
363 void *restart_stack;
364 
365 unsigned long stack_alloc(void)
366 {
367 	void *stack;
368 
369 	stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
370 			       NUMA_NO_NODE, __builtin_return_address(0));
371 	kmemleak_not_leak(stack);
372 	return (unsigned long)stack;
373 }
374 
375 void stack_free(unsigned long stack)
376 {
377 	vfree((void *)stack);
378 }
379 
380 static unsigned long __init stack_alloc_early(void)
381 {
382 	unsigned long stack;
383 
384 	stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
385 	return stack;
386 }
387 
388 static void __init setup_lowcore(void)
389 {
390 	struct lowcore *lc, *abs_lc;
391 
392 	/*
393 	 * Setup lowcore for boot cpu
394 	 */
395 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
396 	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
397 	if (!lc)
398 		panic("%s: Failed to allocate %zu bytes align=%zx\n",
399 		      __func__, sizeof(*lc), sizeof(*lc));
400 
401 	lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
402 	lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
403 	lc->restart_psw.addr = __pa(restart_int_handler);
404 	lc->external_new_psw.mask = PSW_KERNEL_BITS;
405 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
406 	lc->svc_new_psw.mask = PSW_KERNEL_BITS;
407 	lc->svc_new_psw.addr = (unsigned long) system_call;
408 	lc->program_new_psw.mask = PSW_KERNEL_BITS;
409 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
410 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
411 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
412 	lc->io_new_psw.mask = PSW_KERNEL_BITS;
413 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
414 	lc->clock_comparator = clock_comparator_max;
415 	lc->current_task = (unsigned long)&init_task;
416 	lc->lpp = LPP_MAGIC;
417 	lc->machine_flags = get_lowcore()->machine_flags;
418 	lc->preempt_count = get_lowcore()->preempt_count;
419 	nmi_alloc_mcesa_early(&lc->mcesad);
420 	lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
421 	lc->exit_timer = get_lowcore()->exit_timer;
422 	lc->user_timer = get_lowcore()->user_timer;
423 	lc->system_timer = get_lowcore()->system_timer;
424 	lc->steal_timer = get_lowcore()->steal_timer;
425 	lc->last_update_timer = get_lowcore()->last_update_timer;
426 	lc->last_update_clock = get_lowcore()->last_update_clock;
427 	/*
428 	 * Allocate the global restart stack which is the same for
429 	 * all CPUs in case *one* of them does a PSW restart.
430 	 */
431 	restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
432 	lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
433 	lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
434 	lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
435 	lc->kernel_stack = get_lowcore()->kernel_stack;
436 	/*
437 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
438 	 * restart data to the absolute zero lowcore. This is necessary if
439 	 * PSW restart is done on an offline CPU that has lowcore zero.
440 	 */
441 	lc->restart_stack = (unsigned long) restart_stack;
442 	lc->restart_fn = (unsigned long) do_restart;
443 	lc->restart_data = 0;
444 	lc->restart_source = -1U;
445 	lc->spinlock_lockval = arch_spin_lockval(0);
446 	lc->spinlock_index = 0;
447 	arch_spin_lock_setup(0);
448 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
449 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
450 	lc->preempt_count = PREEMPT_DISABLED;
451 	lc->kernel_asce = get_lowcore()->kernel_asce;
452 	lc->user_asce = get_lowcore()->user_asce;
453 
454 	system_ctlreg_init_save_area(lc);
455 	abs_lc = get_abs_lowcore();
456 	abs_lc->restart_stack = lc->restart_stack;
457 	abs_lc->restart_fn = lc->restart_fn;
458 	abs_lc->restart_data = lc->restart_data;
459 	abs_lc->restart_source = lc->restart_source;
460 	abs_lc->restart_psw = lc->restart_psw;
461 	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
462 	abs_lc->program_new_psw = lc->program_new_psw;
463 	abs_lc->mcesad = lc->mcesad;
464 	put_abs_lowcore(abs_lc);
465 
466 	set_prefix(__pa(lc));
467 	lowcore_ptr[0] = lc;
468 	if (abs_lowcore_map(0, lowcore_ptr[0], false))
469 		panic("Couldn't setup absolute lowcore");
470 }
471 
472 static struct resource code_resource = {
473 	.name  = "Kernel code",
474 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
475 };
476 
477 static struct resource data_resource = {
478 	.name = "Kernel data",
479 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
480 };
481 
482 static struct resource bss_resource = {
483 	.name = "Kernel bss",
484 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
485 };
486 
487 static struct resource __initdata *standard_resources[] = {
488 	&code_resource,
489 	&data_resource,
490 	&bss_resource,
491 };
492 
493 static void __init setup_resources(void)
494 {
495 	struct resource *res, *std_res, *sub_res;
496 	phys_addr_t start, end;
497 	int j;
498 	u64 i;
499 
500 	code_resource.start = __pa_symbol(_text);
501 	code_resource.end = __pa_symbol(_etext) - 1;
502 	data_resource.start = __pa_symbol(_etext);
503 	data_resource.end = __pa_symbol(_edata) - 1;
504 	bss_resource.start = __pa_symbol(__bss_start);
505 	bss_resource.end = __pa_symbol(__bss_stop) - 1;
506 
507 	for_each_mem_range(i, &start, &end) {
508 		res = memblock_alloc_or_panic(sizeof(*res), 8);
509 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
510 
511 		res->name = "System RAM";
512 		res->start = start;
513 		/*
514 		 * In memblock, end points to the first byte after the
515 		 * range while in resources, end points to the last byte in
516 		 * the range.
517 		 */
518 		res->end = end - 1;
519 		request_resource(&iomem_resource, res);
520 
521 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
522 			std_res = standard_resources[j];
523 			if (std_res->start < res->start ||
524 			    std_res->start > res->end)
525 				continue;
526 			if (std_res->end > res->end) {
527 				sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
528 				*sub_res = *std_res;
529 				sub_res->end = res->end;
530 				std_res->start = res->end + 1;
531 				request_resource(res, sub_res);
532 			} else {
533 				request_resource(res, std_res);
534 			}
535 		}
536 	}
537 #ifdef CONFIG_CRASH_DUMP
538 	/*
539 	 * Re-add removed crash kernel memory as reserved memory. This makes
540 	 * sure it will be mapped with the identity mapping and struct pages
541 	 * will be created, so it can be resized later on.
542 	 * However add it later since the crash kernel resource should not be
543 	 * part of the System RAM resource.
544 	 */
545 	if (crashk_res.end) {
546 		memblock_add_node(crashk_res.start, resource_size(&crashk_res),
547 				  0, MEMBLOCK_NONE);
548 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
549 		insert_resource(&iomem_resource, &crashk_res);
550 	}
551 #endif
552 }
553 
554 static void __init setup_memory_end(void)
555 {
556 	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
557 	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
558 }
559 
560 #ifdef CONFIG_CRASH_DUMP
561 
562 /*
563  * When kdump is enabled, we have to ensure that no memory from the area
564  * [0 - crashkernel memory size] is set offline - it will be exchanged with
565  * the crashkernel memory region when kdump is triggered. The crashkernel
566  * memory region can never get offlined (pages are unmovable).
567  */
568 static int kdump_mem_notifier(struct notifier_block *nb,
569 			      unsigned long action, void *data)
570 {
571 	struct memory_notify *arg = data;
572 
573 	if (action != MEM_GOING_OFFLINE)
574 		return NOTIFY_OK;
575 	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
576 		return NOTIFY_BAD;
577 	return NOTIFY_OK;
578 }
579 
580 static struct notifier_block kdump_mem_nb = {
581 	.notifier_call = kdump_mem_notifier,
582 };
583 
584 #endif
585 
586 /*
587  * Reserve page tables created by decompressor
588  */
589 static void __init reserve_pgtables(void)
590 {
591 	unsigned long start, end;
592 	struct reserved_range *range;
593 
594 	for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
595 		memblock_reserve(start, end - start);
596 }
597 
598 /*
599  * Reserve memory for kdump kernel to be loaded with kexec
600  */
601 static void __init reserve_crashkernel(void)
602 {
603 #ifdef CONFIG_CRASH_DUMP
604 	unsigned long long crash_base, crash_size;
605 	phys_addr_t low, high;
606 	int rc;
607 
608 	rc = parse_crashkernel(boot_command_line, ident_map_size,
609 			       &crash_size, &crash_base, NULL, NULL);
610 
611 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
612 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
613 	if (rc || crash_size == 0)
614 		return;
615 
616 	if (memblock.memory.regions[0].size < crash_size) {
617 		pr_info("crashkernel reservation failed: %s\n",
618 			"first memory chunk must be at least crashkernel size");
619 		return;
620 	}
621 
622 	low = crash_base ?: oldmem_data.start;
623 	high = low + crash_size;
624 	if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
625 		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
626 		crash_base = low;
627 	} else {
628 		/* Find suitable area in free memory */
629 		low = max_t(unsigned long, crash_size, sclp.hsa_size);
630 		high = crash_base ? crash_base + crash_size : ULONG_MAX;
631 
632 		if (crash_base && crash_base < low) {
633 			pr_info("crashkernel reservation failed: %s\n",
634 				"crash_base too low");
635 			return;
636 		}
637 		low = crash_base ?: low;
638 		crash_base = memblock_phys_alloc_range(crash_size,
639 						       KEXEC_CRASH_MEM_ALIGN,
640 						       low, high);
641 	}
642 
643 	if (!crash_base) {
644 		pr_info("crashkernel reservation failed: %s\n",
645 			"no suitable area found");
646 		return;
647 	}
648 
649 	if (register_memory_notifier(&kdump_mem_nb)) {
650 		memblock_phys_free(crash_base, crash_size);
651 		return;
652 	}
653 
654 	if (!oldmem_data.start && machine_is_vm())
655 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
656 	crashk_res.start = crash_base;
657 	crashk_res.end = crash_base + crash_size - 1;
658 	memblock_remove(crash_base, crash_size);
659 	pr_info("Reserving %lluMB of memory at %lluMB "
660 		"for crashkernel (System RAM: %luMB)\n",
661 		crash_size >> 20, crash_base >> 20,
662 		(unsigned long)memblock.memory.total_size >> 20);
663 	os_info_crashkernel_add(crash_base, crash_size);
664 #endif
665 }
666 
667 /*
668  * Reserve the initrd from being used by memblock
669  */
670 static void __init reserve_initrd(void)
671 {
672 	unsigned long addr, size;
673 
674 	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
675 		return;
676 	initrd_start = (unsigned long)__va(addr);
677 	initrd_end = initrd_start + size;
678 	memblock_reserve(addr, size);
679 }
680 
681 /*
682  * Reserve the memory area used to pass the certificate lists
683  */
684 static void __init reserve_certificate_list(void)
685 {
686 	if (ipl_cert_list_addr)
687 		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
688 }
689 
690 static void __init reserve_physmem_info(void)
691 {
692 	unsigned long addr, size;
693 
694 	if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
695 		memblock_reserve(addr, size);
696 }
697 
698 static void __init free_physmem_info(void)
699 {
700 	unsigned long addr, size;
701 
702 	if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
703 		memblock_phys_free(addr, size);
704 }
705 
706 static void __init memblock_add_physmem_info(void)
707 {
708 	unsigned long start, end;
709 	int i;
710 
711 	pr_debug("physmem info source: %s (%hhd)\n",
712 		 get_physmem_info_source(), physmem_info.info_source);
713 	/* keep memblock lists close to the kernel */
714 	memblock_set_bottom_up(true);
715 	for_each_physmem_usable_range(i, &start, &end)
716 		memblock_add(start, end - start);
717 	for_each_physmem_online_range(i, &start, &end)
718 		memblock_physmem_add(start, end - start);
719 	memblock_set_bottom_up(false);
720 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
721 }
722 
723 /*
724  * Reserve memory used for lowcore.
725  */
726 static void __init reserve_lowcore(void)
727 {
728 	void *lowcore_start = get_lowcore();
729 	void *lowcore_end = lowcore_start + sizeof(struct lowcore);
730 	void *start, *end;
731 
732 	if (absolute_pointer(__identity_base) < lowcore_end) {
733 		start = max(lowcore_start, (void *)__identity_base);
734 		end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
735 		memblock_reserve(__pa(start), __pa(end));
736 	}
737 }
738 
739 /*
740  * Reserve memory used for absolute lowcore/command line/kernel image.
741  */
742 static void __init reserve_kernel(void)
743 {
744 	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
745 	memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
746 	memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
747 	memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
748 	memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
749 	memblock_reserve(__pa(_stext), _end - _stext);
750 }
751 
752 static void __init setup_memory(void)
753 {
754 	phys_addr_t start, end;
755 	u64 i;
756 
757 	/*
758 	 * Init storage key for present memory
759 	 */
760 	for_each_mem_range(i, &start, &end)
761 		storage_key_init_range(start, end);
762 
763 	psw_set_key(PAGE_DEFAULT_KEY);
764 }
765 
766 static void __init relocate_amode31_section(void)
767 {
768 	unsigned long amode31_size = __eamode31 - __samode31;
769 	long amode31_offset, *ptr;
770 
771 	amode31_offset = AMODE31_START - (unsigned long)__samode31;
772 	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
773 
774 	/* Move original AMODE31 section to the new one */
775 	memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
776 	/* Zero out the old AMODE31 section to catch invalid accesses within it */
777 	memset(__samode31, 0, amode31_size);
778 
779 	/* Update all AMODE31 region references */
780 	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
781 		*ptr += amode31_offset;
782 }
783 
784 /* This must be called after AMODE31 relocation */
785 static void __init setup_cr(void)
786 {
787 	union ctlreg2 cr2;
788 	union ctlreg5 cr5;
789 	union ctlreg15 cr15;
790 
791 	__ctl_duct[1] = (unsigned long)__ctl_aste;
792 	__ctl_duct[2] = (unsigned long)__ctl_aste;
793 	__ctl_duct[4] = (unsigned long)__ctl_duald;
794 
795 	/* Update control registers CR2, CR5 and CR15 */
796 	local_ctl_store(2, &cr2.reg);
797 	local_ctl_store(5, &cr5.reg);
798 	local_ctl_store(15, &cr15.reg);
799 	cr2.ducto = (unsigned long)__ctl_duct >> 6;
800 	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
801 	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
802 	system_ctl_load(2, &cr2.reg);
803 	system_ctl_load(5, &cr5.reg);
804 	system_ctl_load(15, &cr15.reg);
805 }
806 
807 /*
808  * Add system information as device randomness
809  */
810 static void __init setup_randomness(void)
811 {
812 	struct sysinfo_3_2_2 *vmms;
813 
814 	vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
815 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
816 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
817 	memblock_free(vmms, PAGE_SIZE);
818 
819 	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
820 		static_branch_enable(&s390_arch_random_available);
821 }
822 
823 /*
824  * Issue diagnose 318 to set the control program name and
825  * version codes.
826  */
827 static void __init setup_control_program_code(void)
828 {
829 	union diag318_info diag318_info = {
830 		.cpnc = CPNC_LINUX,
831 		.cpvc = 0,
832 	};
833 
834 	if (!sclp.has_diag318)
835 		return;
836 
837 	diag_stat_inc(DIAG_STAT_X318);
838 	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
839 }
840 
841 /*
842  * Print the component list from the IPL report
843  */
844 static void __init log_component_list(void)
845 {
846 	struct ipl_rb_component_entry *ptr, *end;
847 	char *str;
848 
849 	if (!early_ipl_comp_list_addr)
850 		return;
851 	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
852 		pr_info("Linux is running with Secure-IPL enabled\n");
853 	else
854 		pr_info("Linux is running with Secure-IPL disabled\n");
855 	ptr = __va(early_ipl_comp_list_addr);
856 	end = (void *) ptr + early_ipl_comp_list_size;
857 	pr_info("The IPL report contains the following components:\n");
858 	while (ptr < end) {
859 		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
860 			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
861 				str = "signed, verified";
862 			else
863 				str = "signed, verification failed";
864 		} else {
865 			str = "not signed";
866 		}
867 		pr_info("%016llx - %016llx (%s)\n",
868 			ptr->addr, ptr->addr + ptr->len, str);
869 		ptr++;
870 	}
871 }
872 
873 /*
874  * Print avoiding interpretation of % in buf and taking bootdebug option
875  * into consideration.
876  */
877 static void __init print_rb_entry(const char *buf)
878 {
879 	char fmt[] = KERN_SOH "0boot: %s";
880 	int level = printk_get_level(buf);
881 
882 	buf = skip_timestamp(printk_skip_level(buf));
883 	if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
884 		return;
885 
886 	fmt[1] = level;
887 	printk(fmt, buf);
888 }
889 
890 /*
891  * Setup function called from init/main.c just after the banner
892  * was printed.
893  */
894 
895 void __init setup_arch(char **cmdline_p)
896 {
897         /*
898          * print what head.S has found out about the machine
899          */
900 	if (machine_is_vm())
901 		pr_info("Linux is running as a z/VM "
902 			"guest operating system in 64-bit mode\n");
903 	else if (machine_is_kvm())
904 		pr_info("Linux is running under KVM in 64-bit mode\n");
905 	else if (machine_is_lpar())
906 		pr_info("Linux is running natively in 64-bit mode\n");
907 	else
908 		pr_info("Linux is running as a guest in 64-bit mode\n");
909 	/* Print decompressor messages if not already printed */
910 	if (!boot_earlyprintk)
911 		boot_rb_foreach(print_rb_entry);
912 
913 	if (machine_has_relocated_lowcore())
914 		pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
915 
916 	log_component_list();
917 
918 	/* Have one command line that is parsed and saved in /proc/cmdline */
919 	/* boot_command_line has been already set up in early.c */
920 	*cmdline_p = boot_command_line;
921 
922         ROOT_DEV = Root_RAM0;
923 
924 	setup_initial_init_mm(_text, _etext, _edata, _end);
925 
926 	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
927 		nospec_auto_detect();
928 
929 	jump_label_init();
930 	parse_early_param();
931 #ifdef CONFIG_CRASH_DUMP
932 	/* Deactivate elfcorehdr= kernel parameter */
933 	elfcorehdr_addr = ELFCORE_ADDR_MAX;
934 #endif
935 
936 	os_info_init();
937 	setup_ipl();
938 	setup_control_program_code();
939 
940 	/* Do some memory reservations *before* memory is added to memblock */
941 	reserve_pgtables();
942 	reserve_lowcore();
943 	reserve_kernel();
944 	reserve_initrd();
945 	reserve_certificate_list();
946 	reserve_physmem_info();
947 	memblock_set_current_limit(ident_map_size);
948 	memblock_allow_resize();
949 
950 	/* Get information about *all* installed memory */
951 	memblock_add_physmem_info();
952 
953 	free_physmem_info();
954 	setup_memory_end();
955 	memblock_dump_all();
956 	setup_memory();
957 
958 	relocate_amode31_section();
959 	setup_cr();
960 	setup_uv();
961 	dma_contiguous_reserve(ident_map_size);
962 	vmcp_cma_reserve();
963 	if (cpu_has_edat2())
964 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
965 
966 	reserve_crashkernel();
967 #ifdef CONFIG_CRASH_DUMP
968 	/*
969 	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
970 	 * Therefore CPU and device initialization should be done afterwards.
971 	 */
972 	smp_save_dump_secondary_cpus();
973 #endif
974 
975 	setup_resources();
976 	setup_lowcore();
977 	smp_fill_possible_mask();
978 	cpu_detect_mhz_feature();
979         cpu_init();
980 	numa_setup();
981 	smp_detect_cpus();
982 	topology_init_early();
983 	setup_protection_map();
984 	/*
985 	 * Create kernel page tables.
986 	 */
987         paging_init();
988 
989 	/*
990 	 * After paging_init created the kernel page table, the new PSWs
991 	 * in lowcore can now run with DAT enabled.
992 	 */
993 #ifdef CONFIG_CRASH_DUMP
994 	smp_save_dump_ipl_cpu();
995 #endif
996 
997         /* Setup default console */
998 	conmode_default();
999 	set_preferred_console();
1000 
1001 	apply_alternative_instructions();
1002 	if (IS_ENABLED(CONFIG_EXPOLINE))
1003 		nospec_init_branches();
1004 
1005 	/* Setup zfcp/nvme dump support */
1006 	setup_zfcpdump();
1007 
1008 	/* Add system specific data to the random pool */
1009 	setup_randomness();
1010 }
1011 
1012 void __init arch_cpu_finalize_init(void)
1013 {
1014 	sclp_init();
1015 }
1016