xref: /linux/arch/s390/kernel/setup.c (revision d90d90a1978af6530c7d8b201c4ab117d0506b1a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11 
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15 
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/root_dev.h>
38 #include <linux/console.h>
39 #include <linux/kernel_stat.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/device.h>
42 #include <linux/notifier.h>
43 #include <linux/pfn.h>
44 #include <linux/ctype.h>
45 #include <linux/reboot.h>
46 #include <linux/topology.h>
47 #include <linux/kexec.h>
48 #include <linux/crash_dump.h>
49 #include <linux/memory.h>
50 #include <linux/compat.h>
51 #include <linux/start_kernel.h>
52 #include <linux/hugetlb.h>
53 #include <linux/kmemleak.h>
54 
55 #include <asm/archrandom.h>
56 #include <asm/boot_data.h>
57 #include <asm/ipl.h>
58 #include <asm/facility.h>
59 #include <asm/smp.h>
60 #include <asm/mmu_context.h>
61 #include <asm/cpcmd.h>
62 #include <asm/abs_lowcore.h>
63 #include <asm/nmi.h>
64 #include <asm/irq.h>
65 #include <asm/page.h>
66 #include <asm/ptrace.h>
67 #include <asm/sections.h>
68 #include <asm/ebcdic.h>
69 #include <asm/diag.h>
70 #include <asm/os_info.h>
71 #include <asm/sclp.h>
72 #include <asm/stacktrace.h>
73 #include <asm/sysinfo.h>
74 #include <asm/numa.h>
75 #include <asm/alternative.h>
76 #include <asm/nospec-branch.h>
77 #include <asm/mem_detect.h>
78 #include <asm/maccess.h>
79 #include <asm/uv.h>
80 #include <asm/asm-offsets.h>
81 #include "entry.h"
82 
83 /*
84  * Machine setup..
85  */
86 unsigned int console_mode = 0;
87 EXPORT_SYMBOL(console_mode);
88 
89 unsigned int console_devno = -1;
90 EXPORT_SYMBOL(console_devno);
91 
92 unsigned int console_irq = -1;
93 EXPORT_SYMBOL(console_irq);
94 
95 /*
96  * Some code and data needs to stay below 2 GB, even when the kernel would be
97  * relocated above 2 GB, because it has to use 31 bit addresses.
98  * Such code and data is part of the .amode31 section.
99  */
100 unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
101 unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
102 unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
103 unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
104 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
105 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
106 
107 /*
108  * Control registers CR2, CR5 and CR15 are initialized with addresses
109  * of tables that must be placed below 2G which is handled by the AMODE31
110  * sections.
111  * Because the AMODE31 sections are relocated below 2G at startup,
112  * the content of control registers CR2, CR5 and CR15 must be updated
113  * with new addresses after the relocation. The initial initialization of
114  * control registers occurs in head64.S and then gets updated again after AMODE31
115  * relocation. We must access the relevant AMODE31 tables indirectly via
116  * pointers placed in the .amode31.refs linker section. Those pointers get
117  * updated automatically during AMODE31 relocation and always contain a valid
118  * address within AMODE31 sections.
119  */
120 
121 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
122 
123 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
124 	[1] = 0xffffffffffffffff
125 };
126 
127 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
128 	0x80000000, 0, 0, 0,
129 	0x80000000, 0, 0, 0,
130 	0x80000000, 0, 0, 0,
131 	0x80000000, 0, 0, 0,
132 	0x80000000, 0, 0, 0,
133 	0x80000000, 0, 0, 0,
134 	0x80000000, 0, 0, 0,
135 	0x80000000, 0, 0, 0
136 };
137 
138 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
139 	0, 0, 0x89000000, 0,
140 	0, 0, 0x8a000000, 0
141 };
142 
143 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
144 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
145 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
146 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
147 
148 int __bootdata(noexec_disabled);
149 unsigned long __bootdata(ident_map_size);
150 struct mem_detect_info __bootdata(mem_detect);
151 struct initrd_data __bootdata(initrd_data);
152 unsigned long __bootdata(pgalloc_pos);
153 unsigned long __bootdata(pgalloc_end);
154 unsigned long __bootdata(pgalloc_low);
155 
156 unsigned long __bootdata_preserved(__kaslr_offset);
157 unsigned long __bootdata(__amode31_base);
158 unsigned int __bootdata_preserved(zlib_dfltcc_support);
159 EXPORT_SYMBOL(zlib_dfltcc_support);
160 u64 __bootdata_preserved(stfle_fac_list[16]);
161 EXPORT_SYMBOL(stfle_fac_list);
162 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
163 struct oldmem_data __bootdata_preserved(oldmem_data);
164 
165 unsigned long VMALLOC_START;
166 EXPORT_SYMBOL(VMALLOC_START);
167 
168 unsigned long VMALLOC_END;
169 EXPORT_SYMBOL(VMALLOC_END);
170 
171 struct page *vmemmap;
172 EXPORT_SYMBOL(vmemmap);
173 unsigned long vmemmap_size;
174 
175 unsigned long MODULES_VADDR;
176 unsigned long MODULES_END;
177 
178 /* An array with a pointer to the lowcore of every CPU. */
179 struct lowcore *lowcore_ptr[NR_CPUS];
180 EXPORT_SYMBOL(lowcore_ptr);
181 
182 DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
183 
184 /*
185  * The Write Back bit position in the physaddr is given by the SLPC PCI.
186  * Leaving the mask zero always uses write through which is safe
187  */
188 unsigned long mio_wb_bit_mask __ro_after_init;
189 
190 /*
191  * This is set up by the setup-routine at boot-time
192  * for S390 need to find out, what we have to setup
193  * using address 0x10400 ...
194  */
195 
196 #include <asm/setup.h>
197 
198 /*
199  * condev= and conmode= setup parameter.
200  */
201 
202 static int __init condev_setup(char *str)
203 {
204 	int vdev;
205 
206 	vdev = simple_strtoul(str, &str, 0);
207 	if (vdev >= 0 && vdev < 65536) {
208 		console_devno = vdev;
209 		console_irq = -1;
210 	}
211 	return 1;
212 }
213 
214 __setup("condev=", condev_setup);
215 
216 static void __init set_preferred_console(void)
217 {
218 	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
219 		add_preferred_console("ttyS", 0, NULL);
220 	else if (CONSOLE_IS_3270)
221 		add_preferred_console("tty3270", 0, NULL);
222 	else if (CONSOLE_IS_VT220)
223 		add_preferred_console("ttysclp", 0, NULL);
224 	else if (CONSOLE_IS_HVC)
225 		add_preferred_console("hvc", 0, NULL);
226 }
227 
228 static int __init conmode_setup(char *str)
229 {
230 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
231 	if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
232                 SET_CONSOLE_SCLP;
233 #endif
234 #if defined(CONFIG_TN3215_CONSOLE)
235 	if (!strcmp(str, "3215"))
236 		SET_CONSOLE_3215;
237 #endif
238 #if defined(CONFIG_TN3270_CONSOLE)
239 	if (!strcmp(str, "3270"))
240 		SET_CONSOLE_3270;
241 #endif
242 	set_preferred_console();
243         return 1;
244 }
245 
246 __setup("conmode=", conmode_setup);
247 
248 static void __init conmode_default(void)
249 {
250 	char query_buffer[1024];
251 	char *ptr;
252 
253         if (MACHINE_IS_VM) {
254 		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
255 		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
256 		ptr = strstr(query_buffer, "SUBCHANNEL =");
257 		console_irq = simple_strtoul(ptr + 13, NULL, 16);
258 		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
259 		ptr = strstr(query_buffer, "CONMODE");
260 		/*
261 		 * Set the conmode to 3215 so that the device recognition
262 		 * will set the cu_type of the console to 3215. If the
263 		 * conmode is 3270 and we don't set it back then both
264 		 * 3215 and the 3270 driver will try to access the console
265 		 * device (3215 as console and 3270 as normal tty).
266 		 */
267 		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
268 		if (ptr == NULL) {
269 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
270 			SET_CONSOLE_SCLP;
271 #endif
272 			return;
273 		}
274 		if (str_has_prefix(ptr + 8, "3270")) {
275 #if defined(CONFIG_TN3270_CONSOLE)
276 			SET_CONSOLE_3270;
277 #elif defined(CONFIG_TN3215_CONSOLE)
278 			SET_CONSOLE_3215;
279 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
280 			SET_CONSOLE_SCLP;
281 #endif
282 		} else if (str_has_prefix(ptr + 8, "3215")) {
283 #if defined(CONFIG_TN3215_CONSOLE)
284 			SET_CONSOLE_3215;
285 #elif defined(CONFIG_TN3270_CONSOLE)
286 			SET_CONSOLE_3270;
287 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
288 			SET_CONSOLE_SCLP;
289 #endif
290 		}
291 	} else if (MACHINE_IS_KVM) {
292 		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
293 			SET_CONSOLE_VT220;
294 		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
295 			SET_CONSOLE_SCLP;
296 		else
297 			SET_CONSOLE_HVC;
298 	} else {
299 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
300 		SET_CONSOLE_SCLP;
301 #endif
302 	}
303 }
304 
305 #ifdef CONFIG_CRASH_DUMP
306 static void __init setup_zfcpdump(void)
307 {
308 	if (!is_ipl_type_dump())
309 		return;
310 	if (oldmem_data.start)
311 		return;
312 	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
313 	console_loglevel = 2;
314 }
315 #else
316 static inline void setup_zfcpdump(void) {}
317 #endif /* CONFIG_CRASH_DUMP */
318 
319  /*
320  * Reboot, halt and power_off stubs. They just call _machine_restart,
321  * _machine_halt or _machine_power_off.
322  */
323 
324 void machine_restart(char *command)
325 {
326 	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
327 		/*
328 		 * Only unblank the console if we are called in enabled
329 		 * context or a bust_spinlocks cleared the way for us.
330 		 */
331 		console_unblank();
332 	_machine_restart(command);
333 }
334 
335 void machine_halt(void)
336 {
337 	if (!in_interrupt() || oops_in_progress)
338 		/*
339 		 * Only unblank the console if we are called in enabled
340 		 * context or a bust_spinlocks cleared the way for us.
341 		 */
342 		console_unblank();
343 	_machine_halt();
344 }
345 
346 void machine_power_off(void)
347 {
348 	if (!in_interrupt() || oops_in_progress)
349 		/*
350 		 * Only unblank the console if we are called in enabled
351 		 * context or a bust_spinlocks cleared the way for us.
352 		 */
353 		console_unblank();
354 	_machine_power_off();
355 }
356 
357 /*
358  * Dummy power off function.
359  */
360 void (*pm_power_off)(void) = machine_power_off;
361 EXPORT_SYMBOL_GPL(pm_power_off);
362 
363 void *restart_stack;
364 
365 unsigned long stack_alloc(void)
366 {
367 #ifdef CONFIG_VMAP_STACK
368 	void *ret;
369 
370 	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
371 			     NUMA_NO_NODE, __builtin_return_address(0));
372 	kmemleak_not_leak(ret);
373 	return (unsigned long)ret;
374 #else
375 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
376 #endif
377 }
378 
379 void stack_free(unsigned long stack)
380 {
381 #ifdef CONFIG_VMAP_STACK
382 	vfree((void *) stack);
383 #else
384 	free_pages(stack, THREAD_SIZE_ORDER);
385 #endif
386 }
387 
388 int __init arch_early_irq_init(void)
389 {
390 	unsigned long stack;
391 
392 	stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
393 	if (!stack)
394 		panic("Couldn't allocate async stack");
395 	S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
396 	return 0;
397 }
398 
399 void __init arch_call_rest_init(void)
400 {
401 	unsigned long stack;
402 
403 	smp_reinit_ipl_cpu();
404 	stack = stack_alloc();
405 	if (!stack)
406 		panic("Couldn't allocate kernel stack");
407 	current->stack = (void *) stack;
408 #ifdef CONFIG_VMAP_STACK
409 	current->stack_vm_area = (void *) stack;
410 #endif
411 	set_task_stack_end_magic(current);
412 	stack += STACK_INIT_OFFSET;
413 	S390_lowcore.kernel_stack = stack;
414 	call_on_stack_noreturn(rest_init, stack);
415 }
416 
417 static void __init setup_lowcore(void)
418 {
419 	struct lowcore *lc, *abs_lc;
420 	unsigned long mcck_stack;
421 
422 	/*
423 	 * Setup lowcore for boot cpu
424 	 */
425 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
426 	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
427 	if (!lc)
428 		panic("%s: Failed to allocate %zu bytes align=%zx\n",
429 		      __func__, sizeof(*lc), sizeof(*lc));
430 
431 	lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
432 	lc->restart_psw.addr = __pa(restart_int_handler);
433 	lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
434 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
435 	lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
436 	lc->svc_new_psw.addr = (unsigned long) system_call;
437 	lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
438 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
439 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
440 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
441 	lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
442 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
443 	lc->clock_comparator = clock_comparator_max;
444 	lc->nodat_stack = ((unsigned long) &init_thread_union)
445 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
446 	lc->current_task = (unsigned long)&init_task;
447 	lc->lpp = LPP_MAGIC;
448 	lc->machine_flags = S390_lowcore.machine_flags;
449 	lc->preempt_count = S390_lowcore.preempt_count;
450 	nmi_alloc_mcesa_early(&lc->mcesad);
451 	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
452 	lc->exit_timer = S390_lowcore.exit_timer;
453 	lc->user_timer = S390_lowcore.user_timer;
454 	lc->system_timer = S390_lowcore.system_timer;
455 	lc->steal_timer = S390_lowcore.steal_timer;
456 	lc->last_update_timer = S390_lowcore.last_update_timer;
457 	lc->last_update_clock = S390_lowcore.last_update_clock;
458 
459 	/*
460 	 * Allocate the global restart stack which is the same for
461 	 * all CPUs in cast *one* of them does a PSW restart.
462 	 */
463 	restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
464 	if (!restart_stack)
465 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
466 		      __func__, THREAD_SIZE, THREAD_SIZE);
467 	restart_stack += STACK_INIT_OFFSET;
468 
469 	/*
470 	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
471 	 * restart data to the absolute zero lowcore. This is necessary if
472 	 * PSW restart is done on an offline CPU that has lowcore zero.
473 	 */
474 	lc->restart_stack = (unsigned long) restart_stack;
475 	lc->restart_fn = (unsigned long) do_restart;
476 	lc->restart_data = 0;
477 	lc->restart_source = -1U;
478 	__ctl_store(lc->cregs_save_area, 0, 15);
479 
480 	mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
481 	if (!mcck_stack)
482 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
483 		      __func__, THREAD_SIZE, THREAD_SIZE);
484 	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
485 
486 	lc->spinlock_lockval = arch_spin_lockval(0);
487 	lc->spinlock_index = 0;
488 	arch_spin_lock_setup(0);
489 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
490 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
491 	lc->preempt_count = PREEMPT_DISABLED;
492 	lc->kernel_asce = S390_lowcore.kernel_asce;
493 	lc->user_asce = S390_lowcore.user_asce;
494 
495 	abs_lc = get_abs_lowcore();
496 	abs_lc->restart_stack = lc->restart_stack;
497 	abs_lc->restart_fn = lc->restart_fn;
498 	abs_lc->restart_data = lc->restart_data;
499 	abs_lc->restart_source = lc->restart_source;
500 	abs_lc->restart_psw = lc->restart_psw;
501 	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
502 	memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
503 	abs_lc->program_new_psw = lc->program_new_psw;
504 	abs_lc->mcesad = lc->mcesad;
505 	put_abs_lowcore(abs_lc);
506 
507 	set_prefix(__pa(lc));
508 	lowcore_ptr[0] = lc;
509 	if (abs_lowcore_map(0, lowcore_ptr[0], false))
510 		panic("Couldn't setup absolute lowcore");
511 }
512 
513 static struct resource code_resource = {
514 	.name  = "Kernel code",
515 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
516 };
517 
518 static struct resource data_resource = {
519 	.name = "Kernel data",
520 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
521 };
522 
523 static struct resource bss_resource = {
524 	.name = "Kernel bss",
525 	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
526 };
527 
528 static struct resource __initdata *standard_resources[] = {
529 	&code_resource,
530 	&data_resource,
531 	&bss_resource,
532 };
533 
534 static void __init setup_resources(void)
535 {
536 	struct resource *res, *std_res, *sub_res;
537 	phys_addr_t start, end;
538 	int j;
539 	u64 i;
540 
541 	code_resource.start = (unsigned long) _text;
542 	code_resource.end = (unsigned long) _etext - 1;
543 	data_resource.start = (unsigned long) _etext;
544 	data_resource.end = (unsigned long) _edata - 1;
545 	bss_resource.start = (unsigned long) __bss_start;
546 	bss_resource.end = (unsigned long) __bss_stop - 1;
547 
548 	for_each_mem_range(i, &start, &end) {
549 		res = memblock_alloc(sizeof(*res), 8);
550 		if (!res)
551 			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
552 			      __func__, sizeof(*res), 8);
553 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
554 
555 		res->name = "System RAM";
556 		res->start = start;
557 		/*
558 		 * In memblock, end points to the first byte after the
559 		 * range while in resourses, end points to the last byte in
560 		 * the range.
561 		 */
562 		res->end = end - 1;
563 		request_resource(&iomem_resource, res);
564 
565 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
566 			std_res = standard_resources[j];
567 			if (std_res->start < res->start ||
568 			    std_res->start > res->end)
569 				continue;
570 			if (std_res->end > res->end) {
571 				sub_res = memblock_alloc(sizeof(*sub_res), 8);
572 				if (!sub_res)
573 					panic("%s: Failed to allocate %zu bytes align=0x%x\n",
574 					      __func__, sizeof(*sub_res), 8);
575 				*sub_res = *std_res;
576 				sub_res->end = res->end;
577 				std_res->start = res->end + 1;
578 				request_resource(res, sub_res);
579 			} else {
580 				request_resource(res, std_res);
581 			}
582 		}
583 	}
584 #ifdef CONFIG_CRASH_DUMP
585 	/*
586 	 * Re-add removed crash kernel memory as reserved memory. This makes
587 	 * sure it will be mapped with the identity mapping and struct pages
588 	 * will be created, so it can be resized later on.
589 	 * However add it later since the crash kernel resource should not be
590 	 * part of the System RAM resource.
591 	 */
592 	if (crashk_res.end) {
593 		memblock_add_node(crashk_res.start, resource_size(&crashk_res),
594 				  0, MEMBLOCK_NONE);
595 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
596 		insert_resource(&iomem_resource, &crashk_res);
597 	}
598 #endif
599 }
600 
601 static void __init setup_memory_end(void)
602 {
603 	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
604 	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
605 }
606 
607 #ifdef CONFIG_CRASH_DUMP
608 
609 /*
610  * When kdump is enabled, we have to ensure that no memory from the area
611  * [0 - crashkernel memory size] is set offline - it will be exchanged with
612  * the crashkernel memory region when kdump is triggered. The crashkernel
613  * memory region can never get offlined (pages are unmovable).
614  */
615 static int kdump_mem_notifier(struct notifier_block *nb,
616 			      unsigned long action, void *data)
617 {
618 	struct memory_notify *arg = data;
619 
620 	if (action != MEM_GOING_OFFLINE)
621 		return NOTIFY_OK;
622 	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
623 		return NOTIFY_BAD;
624 	return NOTIFY_OK;
625 }
626 
627 static struct notifier_block kdump_mem_nb = {
628 	.notifier_call = kdump_mem_notifier,
629 };
630 
631 #endif
632 
633 /*
634  * Reserve page tables created by decompressor
635  */
636 static void __init reserve_pgtables(void)
637 {
638 	memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
639 }
640 
641 /*
642  * Reserve memory for kdump kernel to be loaded with kexec
643  */
644 static void __init reserve_crashkernel(void)
645 {
646 #ifdef CONFIG_CRASH_DUMP
647 	unsigned long long crash_base, crash_size;
648 	phys_addr_t low, high;
649 	int rc;
650 
651 	rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
652 			       &crash_base);
653 
654 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
655 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
656 	if (rc || crash_size == 0)
657 		return;
658 
659 	if (memblock.memory.regions[0].size < crash_size) {
660 		pr_info("crashkernel reservation failed: %s\n",
661 			"first memory chunk must be at least crashkernel size");
662 		return;
663 	}
664 
665 	low = crash_base ?: oldmem_data.start;
666 	high = low + crash_size;
667 	if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
668 		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
669 		crash_base = low;
670 	} else {
671 		/* Find suitable area in free memory */
672 		low = max_t(unsigned long, crash_size, sclp.hsa_size);
673 		high = crash_base ? crash_base + crash_size : ULONG_MAX;
674 
675 		if (crash_base && crash_base < low) {
676 			pr_info("crashkernel reservation failed: %s\n",
677 				"crash_base too low");
678 			return;
679 		}
680 		low = crash_base ?: low;
681 		crash_base = memblock_phys_alloc_range(crash_size,
682 						       KEXEC_CRASH_MEM_ALIGN,
683 						       low, high);
684 	}
685 
686 	if (!crash_base) {
687 		pr_info("crashkernel reservation failed: %s\n",
688 			"no suitable area found");
689 		return;
690 	}
691 
692 	if (register_memory_notifier(&kdump_mem_nb)) {
693 		memblock_phys_free(crash_base, crash_size);
694 		return;
695 	}
696 
697 	if (!oldmem_data.start && MACHINE_IS_VM)
698 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
699 	crashk_res.start = crash_base;
700 	crashk_res.end = crash_base + crash_size - 1;
701 	memblock_remove(crash_base, crash_size);
702 	pr_info("Reserving %lluMB of memory at %lluMB "
703 		"for crashkernel (System RAM: %luMB)\n",
704 		crash_size >> 20, crash_base >> 20,
705 		(unsigned long)memblock.memory.total_size >> 20);
706 	os_info_crashkernel_add(crash_base, crash_size);
707 #endif
708 }
709 
710 /*
711  * Reserve the initrd from being used by memblock
712  */
713 static void __init reserve_initrd(void)
714 {
715 #ifdef CONFIG_BLK_DEV_INITRD
716 	if (!initrd_data.start || !initrd_data.size)
717 		return;
718 	initrd_start = (unsigned long)__va(initrd_data.start);
719 	initrd_end = initrd_start + initrd_data.size;
720 	memblock_reserve(initrd_data.start, initrd_data.size);
721 #endif
722 }
723 
724 /*
725  * Reserve the memory area used to pass the certificate lists
726  */
727 static void __init reserve_certificate_list(void)
728 {
729 	if (ipl_cert_list_addr)
730 		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
731 }
732 
733 static void __init reserve_mem_detect_info(void)
734 {
735 	unsigned long start, size;
736 
737 	get_mem_detect_reserved(&start, &size);
738 	if (size)
739 		memblock_reserve(start, size);
740 }
741 
742 static void __init free_mem_detect_info(void)
743 {
744 	unsigned long start, size;
745 
746 	get_mem_detect_reserved(&start, &size);
747 	if (size)
748 		memblock_phys_free(start, size);
749 }
750 
751 static const char * __init get_mem_info_source(void)
752 {
753 	switch (mem_detect.info_source) {
754 	case MEM_DETECT_SCLP_STOR_INFO:
755 		return "sclp storage info";
756 	case MEM_DETECT_DIAG260:
757 		return "diag260";
758 	case MEM_DETECT_SCLP_READ_INFO:
759 		return "sclp read info";
760 	case MEM_DETECT_BIN_SEARCH:
761 		return "binary search";
762 	}
763 	return "none";
764 }
765 
766 static void __init memblock_add_mem_detect_info(void)
767 {
768 	unsigned long start, end;
769 	int i;
770 
771 	pr_debug("physmem info source: %s (%hhd)\n",
772 		 get_mem_info_source(), mem_detect.info_source);
773 	/* keep memblock lists close to the kernel */
774 	memblock_set_bottom_up(true);
775 	for_each_mem_detect_usable_block(i, &start, &end)
776 		memblock_add(start, end - start);
777 	for_each_mem_detect_block(i, &start, &end)
778 		memblock_physmem_add(start, end - start);
779 	memblock_set_bottom_up(false);
780 	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
781 }
782 
783 /*
784  * Check for initrd being in usable memory
785  */
786 static void __init check_initrd(void)
787 {
788 #ifdef CONFIG_BLK_DEV_INITRD
789 	if (initrd_data.start && initrd_data.size &&
790 	    !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
791 		pr_err("The initial RAM disk does not fit into the memory\n");
792 		memblock_phys_free(initrd_data.start, initrd_data.size);
793 		initrd_start = initrd_end = 0;
794 	}
795 #endif
796 }
797 
798 /*
799  * Reserve memory used for lowcore/command line/kernel image.
800  */
801 static void __init reserve_kernel(void)
802 {
803 	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
804 	memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
805 	memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
806 	memblock_reserve(__amode31_base, __eamode31 - __samode31);
807 	memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
808 	memblock_reserve(__pa(_stext), _end - _stext);
809 }
810 
811 static void __init setup_memory(void)
812 {
813 	phys_addr_t start, end;
814 	u64 i;
815 
816 	/*
817 	 * Init storage key for present memory
818 	 */
819 	for_each_mem_range(i, &start, &end)
820 		storage_key_init_range(start, end);
821 
822 	psw_set_key(PAGE_DEFAULT_KEY);
823 }
824 
825 static void __init relocate_amode31_section(void)
826 {
827 	unsigned long amode31_size = __eamode31 - __samode31;
828 	long amode31_offset = __amode31_base - __samode31;
829 	long *ptr;
830 
831 	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
832 
833 	/* Move original AMODE31 section to the new one */
834 	memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
835 	/* Zero out the old AMODE31 section to catch invalid accesses within it */
836 	memset((void *)__samode31, 0, amode31_size);
837 
838 	/* Update all AMODE31 region references */
839 	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
840 		*ptr += amode31_offset;
841 }
842 
843 /* This must be called after AMODE31 relocation */
844 static void __init setup_cr(void)
845 {
846 	union ctlreg2 cr2;
847 	union ctlreg5 cr5;
848 	union ctlreg15 cr15;
849 
850 	__ctl_duct[1] = (unsigned long)__ctl_aste;
851 	__ctl_duct[2] = (unsigned long)__ctl_aste;
852 	__ctl_duct[4] = (unsigned long)__ctl_duald;
853 
854 	/* Update control registers CR2, CR5 and CR15 */
855 	__ctl_store(cr2.val, 2, 2);
856 	__ctl_store(cr5.val, 5, 5);
857 	__ctl_store(cr15.val, 15, 15);
858 	cr2.ducto = (unsigned long)__ctl_duct >> 6;
859 	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
860 	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
861 	__ctl_load(cr2.val, 2, 2);
862 	__ctl_load(cr5.val, 5, 5);
863 	__ctl_load(cr15.val, 15, 15);
864 }
865 
866 /*
867  * Add system information as device randomness
868  */
869 static void __init setup_randomness(void)
870 {
871 	struct sysinfo_3_2_2 *vmms;
872 
873 	vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
874 	if (!vmms)
875 		panic("Failed to allocate memory for sysinfo structure\n");
876 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
877 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
878 	memblock_free(vmms, PAGE_SIZE);
879 
880 	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
881 		static_branch_enable(&s390_arch_random_available);
882 }
883 
884 /*
885  * Find the correct size for the task_struct. This depends on
886  * the size of the struct fpu at the end of the thread_struct
887  * which is embedded in the task_struct.
888  */
889 static void __init setup_task_size(void)
890 {
891 	int task_size = sizeof(struct task_struct);
892 
893 	if (!MACHINE_HAS_VX) {
894 		task_size -= sizeof(__vector128) * __NUM_VXRS;
895 		task_size += sizeof(freg_t) * __NUM_FPRS;
896 	}
897 	arch_task_struct_size = task_size;
898 }
899 
900 /*
901  * Issue diagnose 318 to set the control program name and
902  * version codes.
903  */
904 static void __init setup_control_program_code(void)
905 {
906 	union diag318_info diag318_info = {
907 		.cpnc = CPNC_LINUX,
908 		.cpvc = 0,
909 	};
910 
911 	if (!sclp.has_diag318)
912 		return;
913 
914 	diag_stat_inc(DIAG_STAT_X318);
915 	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
916 }
917 
918 /*
919  * Print the component list from the IPL report
920  */
921 static void __init log_component_list(void)
922 {
923 	struct ipl_rb_component_entry *ptr, *end;
924 	char *str;
925 
926 	if (!early_ipl_comp_list_addr)
927 		return;
928 	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
929 		pr_info("Linux is running with Secure-IPL enabled\n");
930 	else
931 		pr_info("Linux is running with Secure-IPL disabled\n");
932 	ptr = (void *) early_ipl_comp_list_addr;
933 	end = (void *) ptr + early_ipl_comp_list_size;
934 	pr_info("The IPL report contains the following components:\n");
935 	while (ptr < end) {
936 		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
937 			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
938 				str = "signed, verified";
939 			else
940 				str = "signed, verification failed";
941 		} else {
942 			str = "not signed";
943 		}
944 		pr_info("%016llx - %016llx (%s)\n",
945 			ptr->addr, ptr->addr + ptr->len, str);
946 		ptr++;
947 	}
948 }
949 
950 /*
951  * Setup function called from init/main.c just after the banner
952  * was printed.
953  */
954 
955 void __init setup_arch(char **cmdline_p)
956 {
957         /*
958          * print what head.S has found out about the machine
959          */
960 	if (MACHINE_IS_VM)
961 		pr_info("Linux is running as a z/VM "
962 			"guest operating system in 64-bit mode\n");
963 	else if (MACHINE_IS_KVM)
964 		pr_info("Linux is running under KVM in 64-bit mode\n");
965 	else if (MACHINE_IS_LPAR)
966 		pr_info("Linux is running natively in 64-bit mode\n");
967 	else
968 		pr_info("Linux is running as a guest in 64-bit mode\n");
969 
970 	log_component_list();
971 
972 	/* Have one command line that is parsed and saved in /proc/cmdline */
973 	/* boot_command_line has been already set up in early.c */
974 	*cmdline_p = boot_command_line;
975 
976         ROOT_DEV = Root_RAM0;
977 
978 	setup_initial_init_mm(_text, _etext, _edata, _end);
979 
980 	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
981 		nospec_auto_detect();
982 
983 	jump_label_init();
984 	parse_early_param();
985 #ifdef CONFIG_CRASH_DUMP
986 	/* Deactivate elfcorehdr= kernel parameter */
987 	elfcorehdr_addr = ELFCORE_ADDR_MAX;
988 #endif
989 
990 	os_info_init();
991 	setup_ipl();
992 	setup_task_size();
993 	setup_control_program_code();
994 
995 	/* Do some memory reservations *before* memory is added to memblock */
996 	reserve_pgtables();
997 	reserve_kernel();
998 	reserve_initrd();
999 	reserve_certificate_list();
1000 	reserve_mem_detect_info();
1001 	memblock_set_current_limit(ident_map_size);
1002 	memblock_allow_resize();
1003 
1004 	/* Get information about *all* installed memory */
1005 	memblock_add_mem_detect_info();
1006 
1007 	free_mem_detect_info();
1008 	setup_memory_end();
1009 	memblock_dump_all();
1010 	setup_memory();
1011 
1012 	relocate_amode31_section();
1013 	setup_cr();
1014 	setup_uv();
1015 	dma_contiguous_reserve(ident_map_size);
1016 	vmcp_cma_reserve();
1017 	if (MACHINE_HAS_EDAT2)
1018 		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
1019 
1020 	check_initrd();
1021 	reserve_crashkernel();
1022 #ifdef CONFIG_CRASH_DUMP
1023 	/*
1024 	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
1025 	 * Therefore CPU and device initialization should be done afterwards.
1026 	 */
1027 	smp_save_dump_secondary_cpus();
1028 #endif
1029 
1030 	setup_resources();
1031 	setup_lowcore();
1032 	smp_fill_possible_mask();
1033 	cpu_detect_mhz_feature();
1034         cpu_init();
1035 	numa_setup();
1036 	smp_detect_cpus();
1037 	topology_init_early();
1038 
1039 	if (test_facility(193))
1040 		static_branch_enable(&cpu_has_bear);
1041 
1042 	/*
1043 	 * Create kernel page tables.
1044 	 */
1045         paging_init();
1046 
1047 	/*
1048 	 * After paging_init created the kernel page table, the new PSWs
1049 	 * in lowcore can now run with DAT enabled.
1050 	 */
1051 #ifdef CONFIG_CRASH_DUMP
1052 	smp_save_dump_ipl_cpu();
1053 #endif
1054 
1055         /* Setup default console */
1056 	conmode_default();
1057 	set_preferred_console();
1058 
1059 	apply_alternative_instructions();
1060 	if (IS_ENABLED(CONFIG_EXPOLINE))
1061 		nospec_init_branches();
1062 
1063 	/* Setup zfcp/nvme dump support */
1064 	setup_zfcpdump();
1065 
1066 	/* Add system specific data to the random pool */
1067 	setup_randomness();
1068 }
1069