1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Load ELF vmlinux file for the kexec_file_load syscall. 4 * 5 * Copyright (C) 2021 Huawei Technologies Co, Ltd. 6 * 7 * Author: Liao Chang (liaochang1@huawei.com) 8 * 9 * Based on kexec-tools' kexec-elf-riscv.c, heavily modified 10 * for kernel. 11 */ 12 13 #define pr_fmt(fmt) "kexec_image: " fmt 14 15 #include <linux/elf.h> 16 #include <linux/kexec.h> 17 #include <linux/slab.h> 18 #include <linux/of.h> 19 #include <linux/libfdt.h> 20 #include <linux/types.h> 21 #include <linux/memblock.h> 22 #include <asm/setup.h> 23 24 int arch_kimage_file_post_load_cleanup(struct kimage *image) 25 { 26 kvfree(image->arch.fdt); 27 image->arch.fdt = NULL; 28 29 vfree(image->elf_headers); 30 image->elf_headers = NULL; 31 image->elf_headers_sz = 0; 32 33 return kexec_image_post_load_cleanup_default(image); 34 } 35 36 static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, 37 struct kexec_elf_info *elf_info, unsigned long old_pbase, 38 unsigned long new_pbase) 39 { 40 int i; 41 int ret = 0; 42 size_t size; 43 struct kexec_buf kbuf; 44 const struct elf_phdr *phdr; 45 46 kbuf.image = image; 47 48 for (i = 0; i < ehdr->e_phnum; i++) { 49 phdr = &elf_info->proghdrs[i]; 50 if (phdr->p_type != PT_LOAD) 51 continue; 52 53 size = phdr->p_filesz; 54 if (size > phdr->p_memsz) 55 size = phdr->p_memsz; 56 57 kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; 58 kbuf.bufsz = size; 59 kbuf.buf_align = phdr->p_align; 60 kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; 61 kbuf.memsz = phdr->p_memsz; 62 kbuf.top_down = false; 63 ret = kexec_add_buffer(&kbuf); 64 if (ret) 65 break; 66 } 67 68 return ret; 69 } 70 71 /* 72 * Go through the available phsyical memory regions and find one that hold 73 * an image of the specified size. 74 */ 75 static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, 76 struct elfhdr *ehdr, struct kexec_elf_info *elf_info, 77 unsigned long *old_pbase, unsigned long *new_pbase) 78 { 79 int i; 80 int ret; 81 struct kexec_buf kbuf; 82 const struct elf_phdr *phdr; 83 unsigned long lowest_paddr = ULONG_MAX; 84 unsigned long lowest_vaddr = ULONG_MAX; 85 86 for (i = 0; i < ehdr->e_phnum; i++) { 87 phdr = &elf_info->proghdrs[i]; 88 if (phdr->p_type != PT_LOAD) 89 continue; 90 91 if (lowest_paddr > phdr->p_paddr) 92 lowest_paddr = phdr->p_paddr; 93 94 if (lowest_vaddr > phdr->p_vaddr) 95 lowest_vaddr = phdr->p_vaddr; 96 } 97 98 kbuf.image = image; 99 kbuf.buf_min = lowest_paddr; 100 kbuf.buf_max = ULONG_MAX; 101 102 /* 103 * Current riscv boot protocol requires 2MB alignment for 104 * RV64 and 4MB alignment for RV32 105 * 106 */ 107 kbuf.buf_align = PMD_SIZE; 108 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 109 kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); 110 kbuf.top_down = false; 111 ret = arch_kexec_locate_mem_hole(&kbuf); 112 if (!ret) { 113 *old_pbase = lowest_paddr; 114 *new_pbase = kbuf.mem; 115 image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; 116 } 117 return ret; 118 } 119 120 #ifdef CONFIG_CRASH_DUMP 121 static int get_nr_ram_ranges_callback(struct resource *res, void *arg) 122 { 123 unsigned int *nr_ranges = arg; 124 125 (*nr_ranges)++; 126 return 0; 127 } 128 129 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) 130 { 131 struct crash_mem *cmem = arg; 132 133 cmem->ranges[cmem->nr_ranges].start = res->start; 134 cmem->ranges[cmem->nr_ranges].end = res->end; 135 cmem->nr_ranges++; 136 137 return 0; 138 } 139 140 static int prepare_elf_headers(void **addr, unsigned long *sz) 141 { 142 struct crash_mem *cmem; 143 unsigned int nr_ranges; 144 int ret; 145 146 nr_ranges = 1; /* For exclusion of crashkernel region */ 147 walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); 148 149 cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); 150 if (!cmem) 151 return -ENOMEM; 152 153 cmem->max_nr_ranges = nr_ranges; 154 cmem->nr_ranges = 0; 155 ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); 156 if (ret) 157 goto out; 158 159 /* Exclude crashkernel region */ 160 ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); 161 if (!ret) 162 ret = crash_prepare_elf64_headers(cmem, true, addr, sz); 163 164 out: 165 kfree(cmem); 166 return ret; 167 } 168 169 static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, 170 unsigned long cmdline_len) 171 { 172 int elfcorehdr_strlen; 173 char *cmdline_ptr; 174 175 cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); 176 if (!cmdline_ptr) 177 return NULL; 178 179 elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", 180 image->elf_load_addr); 181 182 if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { 183 pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); 184 kfree(cmdline_ptr); 185 return NULL; 186 } 187 188 memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); 189 /* Ensure it's nul terminated */ 190 cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; 191 return cmdline_ptr; 192 } 193 #endif 194 195 static void *elf_kexec_load(struct kimage *image, char *kernel_buf, 196 unsigned long kernel_len, char *initrd, 197 unsigned long initrd_len, char *cmdline, 198 unsigned long cmdline_len) 199 { 200 int ret; 201 void *fdt; 202 unsigned long old_kernel_pbase = ULONG_MAX; 203 unsigned long new_kernel_pbase = 0UL; 204 unsigned long initrd_pbase = 0UL; 205 unsigned long kernel_start; 206 struct elfhdr ehdr; 207 struct kexec_buf kbuf; 208 struct kexec_elf_info elf_info; 209 char *modified_cmdline = NULL; 210 211 ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); 212 if (ret) 213 return ERR_PTR(ret); 214 215 ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, 216 &old_kernel_pbase, &new_kernel_pbase); 217 if (ret) 218 goto out; 219 kernel_start = image->start; 220 221 /* Add the kernel binary to the image */ 222 ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, 223 old_kernel_pbase, new_kernel_pbase); 224 if (ret) 225 goto out; 226 227 kbuf.image = image; 228 kbuf.buf_min = new_kernel_pbase + kernel_len; 229 kbuf.buf_max = ULONG_MAX; 230 231 #ifdef CONFIG_CRASH_DUMP 232 /* Add elfcorehdr */ 233 if (image->type == KEXEC_TYPE_CRASH) { 234 void *headers; 235 unsigned long headers_sz; 236 ret = prepare_elf_headers(&headers, &headers_sz); 237 if (ret) { 238 pr_err("Preparing elf core header failed\n"); 239 goto out; 240 } 241 242 kbuf.buffer = headers; 243 kbuf.bufsz = headers_sz; 244 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 245 kbuf.memsz = headers_sz; 246 kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 247 kbuf.top_down = true; 248 249 ret = kexec_add_buffer(&kbuf); 250 if (ret) { 251 vfree(headers); 252 goto out; 253 } 254 image->elf_headers = headers; 255 image->elf_load_addr = kbuf.mem; 256 image->elf_headers_sz = headers_sz; 257 258 kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", 259 image->elf_load_addr, kbuf.bufsz, kbuf.memsz); 260 261 /* Setup cmdline for kdump kernel case */ 262 modified_cmdline = setup_kdump_cmdline(image, cmdline, 263 cmdline_len); 264 if (!modified_cmdline) { 265 pr_err("Setting up cmdline for kdump kernel failed\n"); 266 ret = -EINVAL; 267 goto out; 268 } 269 cmdline = modified_cmdline; 270 } 271 #endif 272 273 #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY 274 /* Add purgatory to the image */ 275 kbuf.top_down = true; 276 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 277 ret = kexec_load_purgatory(image, &kbuf); 278 if (ret) { 279 pr_err("Error loading purgatory ret=%d\n", ret); 280 goto out; 281 } 282 kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); 283 284 ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", 285 &kernel_start, 286 sizeof(kernel_start), 0); 287 if (ret) 288 pr_err("Error update purgatory ret=%d\n", ret); 289 #endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ 290 291 /* Add the initrd to the image */ 292 if (initrd != NULL) { 293 kbuf.buffer = initrd; 294 kbuf.bufsz = kbuf.memsz = initrd_len; 295 kbuf.buf_align = PAGE_SIZE; 296 kbuf.top_down = true; 297 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 298 ret = kexec_add_buffer(&kbuf); 299 if (ret) 300 goto out; 301 initrd_pbase = kbuf.mem; 302 kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); 303 } 304 305 /* Add the DTB to the image */ 306 fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, 307 initrd_len, cmdline, 0); 308 if (!fdt) { 309 pr_err("Error setting up the new device tree.\n"); 310 ret = -EINVAL; 311 goto out; 312 } 313 314 fdt_pack(fdt); 315 kbuf.buffer = fdt; 316 kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); 317 kbuf.buf_align = PAGE_SIZE; 318 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 319 kbuf.top_down = true; 320 ret = kexec_add_buffer(&kbuf); 321 if (ret) { 322 pr_err("Error add DTB kbuf ret=%d\n", ret); 323 goto out_free_fdt; 324 } 325 /* Cache the fdt buffer address for memory cleanup */ 326 image->arch.fdt = fdt; 327 kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); 328 goto out; 329 330 out_free_fdt: 331 kvfree(fdt); 332 out: 333 kfree(modified_cmdline); 334 kexec_free_elf_info(&elf_info); 335 return ret ? ERR_PTR(ret) : NULL; 336 } 337 338 #define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) 339 #define RISCV_IMM_BITS 12 340 #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) 341 #define RISCV_CONST_HIGH_PART(x) \ 342 (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) 343 #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) 344 345 #define ENCODE_ITYPE_IMM(x) \ 346 (RV_X(x, 0, 12) << 20) 347 #define ENCODE_BTYPE_IMM(x) \ 348 ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ 349 (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) 350 #define ENCODE_UTYPE_IMM(x) \ 351 (RV_X(x, 12, 20) << 12) 352 #define ENCODE_JTYPE_IMM(x) \ 353 ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ 354 (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) 355 #define ENCODE_CBTYPE_IMM(x) \ 356 ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ 357 (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) 358 #define ENCODE_CJTYPE_IMM(x) \ 359 ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ 360 (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ 361 (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) 362 #define ENCODE_UJTYPE_IMM(x) \ 363 (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ 364 (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) 365 #define ENCODE_UITYPE_IMM(x) \ 366 (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) 367 368 #define CLEAN_IMM(type, x) \ 369 ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) 370 371 int arch_kexec_apply_relocations_add(struct purgatory_info *pi, 372 Elf_Shdr *section, 373 const Elf_Shdr *relsec, 374 const Elf_Shdr *symtab) 375 { 376 const char *strtab, *name, *shstrtab; 377 const Elf_Shdr *sechdrs; 378 Elf64_Rela *relas; 379 int i, r_type; 380 381 /* String & section header string table */ 382 sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; 383 strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; 384 shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; 385 386 relas = (void *)pi->ehdr + relsec->sh_offset; 387 388 for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { 389 const Elf_Sym *sym; /* symbol to relocate */ 390 unsigned long addr; /* final location after relocation */ 391 unsigned long val; /* relocated symbol value */ 392 unsigned long sec_base; /* relocated symbol value */ 393 void *loc; /* tmp location to modify */ 394 395 sym = (void *)pi->ehdr + symtab->sh_offset; 396 sym += ELF64_R_SYM(relas[i].r_info); 397 398 if (sym->st_name) 399 name = strtab + sym->st_name; 400 else 401 name = shstrtab + sechdrs[sym->st_shndx].sh_name; 402 403 loc = pi->purgatory_buf; 404 loc += section->sh_offset; 405 loc += relas[i].r_offset; 406 407 if (sym->st_shndx == SHN_ABS) 408 sec_base = 0; 409 else if (sym->st_shndx >= pi->ehdr->e_shnum) { 410 pr_err("Invalid section %d for symbol %s\n", 411 sym->st_shndx, name); 412 return -ENOEXEC; 413 } else 414 sec_base = pi->sechdrs[sym->st_shndx].sh_addr; 415 416 val = sym->st_value; 417 val += sec_base; 418 val += relas[i].r_addend; 419 420 addr = section->sh_addr + relas[i].r_offset; 421 422 r_type = ELF64_R_TYPE(relas[i].r_info); 423 424 switch (r_type) { 425 case R_RISCV_BRANCH: 426 *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | 427 ENCODE_BTYPE_IMM(val - addr); 428 break; 429 case R_RISCV_JAL: 430 *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | 431 ENCODE_JTYPE_IMM(val - addr); 432 break; 433 /* 434 * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I 435 * sym is expected to be next to R_RISCV_PCREL_HI20 436 * in purgatory relsec. Handle it like R_RISCV_CALL 437 * sym, instead of searching the whole relsec. 438 */ 439 case R_RISCV_PCREL_HI20: 440 case R_RISCV_CALL_PLT: 441 case R_RISCV_CALL: 442 *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | 443 ENCODE_UJTYPE_IMM(val - addr); 444 break; 445 case R_RISCV_RVC_BRANCH: 446 *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | 447 ENCODE_CBTYPE_IMM(val - addr); 448 break; 449 case R_RISCV_RVC_JUMP: 450 *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | 451 ENCODE_CJTYPE_IMM(val - addr); 452 break; 453 case R_RISCV_ADD32: 454 *(u32 *)loc += val; 455 break; 456 case R_RISCV_SUB32: 457 *(u32 *)loc -= val; 458 break; 459 /* It has been applied by R_RISCV_PCREL_HI20 sym */ 460 case R_RISCV_PCREL_LO12_I: 461 case R_RISCV_ALIGN: 462 case R_RISCV_RELAX: 463 break; 464 default: 465 pr_err("Unknown rela relocation: %d\n", r_type); 466 return -ENOEXEC; 467 } 468 } 469 return 0; 470 } 471 472 const struct kexec_file_ops elf_kexec_ops = { 473 .probe = kexec_elf_probe, 474 .load = elf_kexec_load, 475 }; 476