1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/string.h> 3 #include <linux/elf.h> 4 #include <asm/boot_data.h> 5 #include <asm/sections.h> 6 #include <asm/cpu_mf.h> 7 #include <asm/setup.h> 8 #include <asm/kasan.h> 9 #include <asm/kexec.h> 10 #include <asm/sclp.h> 11 #include <asm/diag.h> 12 #include <asm/uv.h> 13 #include "compressed/decompressor.h" 14 #include "boot.h" 15 16 extern char __boot_data_start[], __boot_data_end[]; 17 extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; 18 unsigned long __bootdata_preserved(__kaslr_offset); 19 unsigned long __bootdata_preserved(VMALLOC_START); 20 unsigned long __bootdata_preserved(VMALLOC_END); 21 struct page *__bootdata_preserved(vmemmap); 22 unsigned long __bootdata_preserved(vmemmap_size); 23 unsigned long __bootdata_preserved(MODULES_VADDR); 24 unsigned long __bootdata_preserved(MODULES_END); 25 unsigned long __bootdata(ident_map_size); 26 int __bootdata(is_full_image) = 1; 27 28 u64 __bootdata_preserved(stfle_fac_list[16]); 29 u64 __bootdata_preserved(alt_stfle_fac_list[16]); 30 31 /* 32 * Some code and data needs to stay below 2 GB, even when the kernel would be 33 * relocated above 2 GB, because it has to use 31 bit addresses. 34 * Such code and data is part of the .dma section, and its location is passed 35 * over to the decompressed / relocated kernel via the .boot.preserved.data 36 * section. 37 */ 38 extern char _sdma[], _edma[]; 39 extern char _stext_dma[], _etext_dma[]; 40 extern struct exception_table_entry _start_dma_ex_table[]; 41 extern struct exception_table_entry _stop_dma_ex_table[]; 42 unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma); 43 unsigned long __bootdata_preserved(__edma) = __pa(&_edma); 44 unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma); 45 unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma); 46 struct exception_table_entry * 47 __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table; 48 struct exception_table_entry * 49 __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table; 50 51 int _diag210_dma(struct diag210 *addr); 52 int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode); 53 int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode); 54 void _diag0c_dma(struct hypfs_diag0c_entry *entry); 55 void _diag308_reset_dma(void); 56 struct diag_ops __bootdata_preserved(diag_dma_ops) = { 57 .diag210 = _diag210_dma, 58 .diag26c = _diag26c_dma, 59 .diag14 = _diag14_dma, 60 .diag0c = _diag0c_dma, 61 .diag308_reset = _diag308_reset_dma 62 }; 63 static struct diag210 _diag210_tmp_dma __section(".dma.data"); 64 struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; 65 66 void error(char *x) 67 { 68 sclp_early_printk("\n\n"); 69 sclp_early_printk(x); 70 sclp_early_printk("\n\n -- System halted"); 71 72 disabled_wait(); 73 } 74 75 static void setup_lpp(void) 76 { 77 S390_lowcore.current_pid = 0; 78 S390_lowcore.lpp = LPP_MAGIC; 79 if (test_facility(40)) 80 lpp(&S390_lowcore.lpp); 81 } 82 83 #ifdef CONFIG_KERNEL_UNCOMPRESSED 84 unsigned long mem_safe_offset(void) 85 { 86 return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; 87 } 88 #endif 89 90 static void rescue_initrd(unsigned long addr) 91 { 92 if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) 93 return; 94 if (!INITRD_START || !INITRD_SIZE) 95 return; 96 if (addr <= INITRD_START) 97 return; 98 memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE); 99 INITRD_START = addr; 100 } 101 102 static void copy_bootdata(void) 103 { 104 if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size) 105 error(".boot.data section size mismatch"); 106 memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size); 107 if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size) 108 error(".boot.preserved.data section size mismatch"); 109 memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size); 110 } 111 112 static void handle_relocs(unsigned long offset) 113 { 114 Elf64_Rela *rela_start, *rela_end, *rela; 115 int r_type, r_sym, rc; 116 Elf64_Addr loc, val; 117 Elf64_Sym *dynsym; 118 119 rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start; 120 rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; 121 dynsym = (Elf64_Sym *) vmlinux.dynsym_start; 122 for (rela = rela_start; rela < rela_end; rela++) { 123 loc = rela->r_offset + offset; 124 val = rela->r_addend; 125 r_sym = ELF64_R_SYM(rela->r_info); 126 if (r_sym) { 127 if (dynsym[r_sym].st_shndx != SHN_UNDEF) 128 val += dynsym[r_sym].st_value + offset; 129 } else { 130 /* 131 * 0 == undefined symbol table index (STN_UNDEF), 132 * used for R_390_RELATIVE, only add KASLR offset 133 */ 134 val += offset; 135 } 136 r_type = ELF64_R_TYPE(rela->r_info); 137 rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); 138 if (rc) 139 error("Unknown relocation type"); 140 } 141 } 142 143 /* 144 * Merge information from several sources into a single ident_map_size value. 145 * "ident_map_size" represents the upper limit of physical memory we may ever 146 * reach. It might not be all online memory, but also include standby (offline) 147 * memory. "ident_map_size" could be lower then actual standby or even online 148 * memory present, due to limiting factors. We should never go above this limit. 149 * It is the size of our identity mapping. 150 * 151 * Consider the following factors: 152 * 1. max_physmem_end - end of physical memory online or standby. 153 * Always <= end of the last online memory block (get_mem_detect_end()). 154 * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the 155 * kernel is able to support. 156 * 3. "mem=" kernel command line option which limits physical memory usage. 157 * 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as 158 * crash kernel. 159 * 5. "hsa" size which is a memory limit when the kernel is executed during 160 * zfcp/nvme dump. 161 */ 162 static void setup_ident_map_size(unsigned long max_physmem_end) 163 { 164 unsigned long hsa_size; 165 166 ident_map_size = max_physmem_end; 167 if (memory_limit) 168 ident_map_size = min(ident_map_size, memory_limit); 169 ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS); 170 171 #ifdef CONFIG_CRASH_DUMP 172 if (OLDMEM_BASE) { 173 kaslr_enabled = 0; 174 ident_map_size = min(ident_map_size, OLDMEM_SIZE); 175 } else if (ipl_block_valid && is_ipl_block_dump()) { 176 kaslr_enabled = 0; 177 if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) 178 ident_map_size = min(ident_map_size, hsa_size); 179 } 180 #endif 181 } 182 183 static void setup_kernel_memory_layout(void) 184 { 185 bool vmalloc_size_verified = false; 186 unsigned long vmemmap_off; 187 unsigned long vspace_left; 188 unsigned long rte_size; 189 unsigned long pages; 190 unsigned long vmax; 191 192 pages = ident_map_size / PAGE_SIZE; 193 /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */ 194 vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); 195 196 /* choose kernel address space layout: 4 or 3 levels. */ 197 vmemmap_off = round_up(ident_map_size, _REGION3_SIZE); 198 if (IS_ENABLED(CONFIG_KASAN) || 199 vmalloc_size > _REGION2_SIZE || 200 vmemmap_off + vmemmap_size + vmalloc_size + MODULES_LEN > _REGION2_SIZE) 201 vmax = _REGION1_SIZE; 202 else 203 vmax = _REGION2_SIZE; 204 205 /* keep vmemmap_off aligned to a top level region table entry */ 206 rte_size = vmax == _REGION1_SIZE ? _REGION2_SIZE : _REGION3_SIZE; 207 MODULES_END = vmax; 208 if (is_prot_virt_host()) { 209 /* 210 * forcing modules and vmalloc area under the ultravisor 211 * secure storage limit, so that any vmalloc allocation 212 * we do could be used to back secure guest storage. 213 */ 214 adjust_to_uv_max(&MODULES_END); 215 } 216 217 #ifdef CONFIG_KASAN 218 if (MODULES_END < vmax) { 219 /* force vmalloc and modules below kasan shadow */ 220 MODULES_END = min(MODULES_END, KASAN_SHADOW_START); 221 } else { 222 /* 223 * leave vmalloc and modules above kasan shadow but make 224 * sure they don't overlap with it 225 */ 226 vmalloc_size = min(vmalloc_size, vmax - KASAN_SHADOW_END - MODULES_LEN); 227 vmalloc_size_verified = true; 228 vspace_left = KASAN_SHADOW_START; 229 } 230 #endif 231 MODULES_VADDR = MODULES_END - MODULES_LEN; 232 VMALLOC_END = MODULES_VADDR; 233 234 if (vmalloc_size_verified) { 235 VMALLOC_START = VMALLOC_END - vmalloc_size; 236 } else { 237 vmemmap_off = round_up(ident_map_size, rte_size); 238 239 if (vmemmap_off + vmemmap_size > VMALLOC_END || 240 vmalloc_size > VMALLOC_END - vmemmap_off - vmemmap_size) { 241 /* 242 * allow vmalloc area to occupy up to 1/2 of 243 * the rest virtual space left. 244 */ 245 vmalloc_size = min(vmalloc_size, VMALLOC_END / 2); 246 } 247 VMALLOC_START = VMALLOC_END - vmalloc_size; 248 vspace_left = VMALLOC_START; 249 } 250 251 pages = vspace_left / (PAGE_SIZE + sizeof(struct page)); 252 pages = SECTION_ALIGN_UP(pages); 253 vmemmap_off = round_up(vspace_left - pages * sizeof(struct page), rte_size); 254 /* keep vmemmap left most starting from a fresh region table entry */ 255 vmemmap_off = min(vmemmap_off, round_up(ident_map_size, rte_size)); 256 /* take care that identity map is lower then vmemmap */ 257 ident_map_size = min(ident_map_size, vmemmap_off); 258 vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); 259 VMALLOC_START = max(vmemmap_off + vmemmap_size, VMALLOC_START); 260 vmemmap = (struct page *)vmemmap_off; 261 } 262 263 /* 264 * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. 265 */ 266 static void clear_bss_section(void) 267 { 268 memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); 269 } 270 271 /* 272 * Set vmalloc area size to an 8th of (potential) physical memory 273 * size, unless size has been set by kernel command line parameter. 274 */ 275 static void setup_vmalloc_size(void) 276 { 277 unsigned long size; 278 279 if (vmalloc_size_set) 280 return; 281 size = round_up(ident_map_size / 8, _SEGMENT_SIZE); 282 vmalloc_size = max(size, vmalloc_size); 283 } 284 285 void startup_kernel(void) 286 { 287 unsigned long random_lma; 288 unsigned long safe_addr; 289 void *img; 290 291 setup_lpp(); 292 store_ipl_parmblock(); 293 safe_addr = mem_safe_offset(); 294 safe_addr = read_ipl_report(safe_addr); 295 uv_query_info(); 296 rescue_initrd(safe_addr); 297 sclp_early_read_info(); 298 setup_boot_command_line(); 299 parse_boot_command_line(); 300 setup_ident_map_size(detect_memory()); 301 setup_vmalloc_size(); 302 setup_kernel_memory_layout(); 303 304 random_lma = __kaslr_offset = 0; 305 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { 306 random_lma = get_random_base(safe_addr); 307 if (random_lma) { 308 __kaslr_offset = random_lma - vmlinux.default_lma; 309 img = (void *)vmlinux.default_lma; 310 vmlinux.default_lma += __kaslr_offset; 311 vmlinux.entry += __kaslr_offset; 312 vmlinux.bootdata_off += __kaslr_offset; 313 vmlinux.bootdata_preserved_off += __kaslr_offset; 314 vmlinux.rela_dyn_start += __kaslr_offset; 315 vmlinux.rela_dyn_end += __kaslr_offset; 316 vmlinux.dynsym_start += __kaslr_offset; 317 } 318 } 319 320 if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { 321 img = decompress_kernel(); 322 memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); 323 } else if (__kaslr_offset) 324 memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); 325 326 clear_bss_section(); 327 copy_bootdata(); 328 if (IS_ENABLED(CONFIG_RELOCATABLE)) 329 handle_relocs(__kaslr_offset); 330 331 if (__kaslr_offset) { 332 /* 333 * Save KASLR offset for early dumps, before vmcore_info is set. 334 * Mark as uneven to distinguish from real vmcore_info pointer. 335 */ 336 S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; 337 /* Clear non-relocated kernel */ 338 if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) 339 memset(img, 0, vmlinux.image_size); 340 } 341 vmlinux.entry(); 342 } 343