1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Load ELF vmlinux file for the kexec_file_load syscall. 4 * 5 * Copyright (C) 2021 Huawei Technologies Co, Ltd. 6 * 7 * Author: Liao Chang (liaochang1@huawei.com) 8 * 9 * Based on kexec-tools' kexec-elf-riscv.c, heavily modified 10 * for kernel. 11 */ 12 13 #define pr_fmt(fmt) "kexec_image: " fmt 14 15 #include <linux/elf.h> 16 #include <linux/kexec.h> 17 #include <linux/slab.h> 18 #include <linux/of.h> 19 #include <linux/libfdt.h> 20 #include <linux/types.h> 21 #include <linux/memblock.h> 22 #include <linux/vmalloc.h> 23 #include <asm/setup.h> 24 25 int arch_kimage_file_post_load_cleanup(struct kimage *image) 26 { 27 kvfree(image->arch.fdt); 28 image->arch.fdt = NULL; 29 30 vfree(image->elf_headers); 31 image->elf_headers = NULL; 32 image->elf_headers_sz = 0; 33 34 return kexec_image_post_load_cleanup_default(image); 35 } 36 37 static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, 38 struct kexec_elf_info *elf_info, unsigned long old_pbase, 39 unsigned long new_pbase) 40 { 41 int i; 42 int ret = 0; 43 size_t size; 44 struct kexec_buf kbuf; 45 const struct elf_phdr *phdr; 46 47 kbuf.image = image; 48 49 for (i = 0; i < ehdr->e_phnum; i++) { 50 phdr = &elf_info->proghdrs[i]; 51 if (phdr->p_type != PT_LOAD) 52 continue; 53 54 size = phdr->p_filesz; 55 if (size > phdr->p_memsz) 56 size = phdr->p_memsz; 57 58 kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; 59 kbuf.bufsz = size; 60 kbuf.buf_align = phdr->p_align; 61 kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; 62 kbuf.memsz = phdr->p_memsz; 63 kbuf.top_down = false; 64 ret = kexec_add_buffer(&kbuf); 65 if (ret) 66 break; 67 } 68 69 return ret; 70 } 71 72 /* 73 * Go through the available phsyical memory regions and find one that hold 74 * an image of the specified size. 75 */ 76 static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, 77 struct elfhdr *ehdr, struct kexec_elf_info *elf_info, 78 unsigned long *old_pbase, unsigned long *new_pbase) 79 { 80 int i; 81 int ret; 82 struct kexec_buf kbuf; 83 const struct elf_phdr *phdr; 84 unsigned long lowest_paddr = ULONG_MAX; 85 unsigned long lowest_vaddr = ULONG_MAX; 86 87 for (i = 0; i < ehdr->e_phnum; i++) { 88 phdr = &elf_info->proghdrs[i]; 89 if (phdr->p_type != PT_LOAD) 90 continue; 91 92 if (lowest_paddr > phdr->p_paddr) 93 lowest_paddr = phdr->p_paddr; 94 95 if (lowest_vaddr > phdr->p_vaddr) 96 lowest_vaddr = phdr->p_vaddr; 97 } 98 99 kbuf.image = image; 100 kbuf.buf_min = lowest_paddr; 101 kbuf.buf_max = ULONG_MAX; 102 103 /* 104 * Current riscv boot protocol requires 2MB alignment for 105 * RV64 and 4MB alignment for RV32 106 * 107 */ 108 kbuf.buf_align = PMD_SIZE; 109 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 110 kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); 111 kbuf.top_down = false; 112 ret = arch_kexec_locate_mem_hole(&kbuf); 113 if (!ret) { 114 *old_pbase = lowest_paddr; 115 *new_pbase = kbuf.mem; 116 image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; 117 } 118 return ret; 119 } 120 121 #ifdef CONFIG_CRASH_DUMP 122 static int get_nr_ram_ranges_callback(struct resource *res, void *arg) 123 { 124 unsigned int *nr_ranges = arg; 125 126 (*nr_ranges)++; 127 return 0; 128 } 129 130 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) 131 { 132 struct crash_mem *cmem = arg; 133 134 cmem->ranges[cmem->nr_ranges].start = res->start; 135 cmem->ranges[cmem->nr_ranges].end = res->end; 136 cmem->nr_ranges++; 137 138 return 0; 139 } 140 141 static int prepare_elf_headers(void **addr, unsigned long *sz) 142 { 143 struct crash_mem *cmem; 144 unsigned int nr_ranges; 145 int ret; 146 147 nr_ranges = 1; /* For exclusion of crashkernel region */ 148 walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); 149 150 cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); 151 if (!cmem) 152 return -ENOMEM; 153 154 cmem->max_nr_ranges = nr_ranges; 155 cmem->nr_ranges = 0; 156 ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); 157 if (ret) 158 goto out; 159 160 /* Exclude crashkernel region */ 161 ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); 162 if (!ret) 163 ret = crash_prepare_elf64_headers(cmem, true, addr, sz); 164 165 out: 166 kfree(cmem); 167 return ret; 168 } 169 170 static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, 171 unsigned long cmdline_len) 172 { 173 int elfcorehdr_strlen; 174 char *cmdline_ptr; 175 176 cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); 177 if (!cmdline_ptr) 178 return NULL; 179 180 elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", 181 image->elf_load_addr); 182 183 if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { 184 pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); 185 kfree(cmdline_ptr); 186 return NULL; 187 } 188 189 memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); 190 /* Ensure it's nul terminated */ 191 cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; 192 return cmdline_ptr; 193 } 194 #endif 195 196 static void *elf_kexec_load(struct kimage *image, char *kernel_buf, 197 unsigned long kernel_len, char *initrd, 198 unsigned long initrd_len, char *cmdline, 199 unsigned long cmdline_len) 200 { 201 int ret; 202 void *fdt; 203 unsigned long old_kernel_pbase = ULONG_MAX; 204 unsigned long new_kernel_pbase = 0UL; 205 unsigned long initrd_pbase = 0UL; 206 unsigned long kernel_start; 207 struct elfhdr ehdr; 208 struct kexec_buf kbuf; 209 struct kexec_elf_info elf_info; 210 char *modified_cmdline = NULL; 211 212 ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); 213 if (ret) 214 return ERR_PTR(ret); 215 216 ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, 217 &old_kernel_pbase, &new_kernel_pbase); 218 if (ret) 219 goto out; 220 kernel_start = image->start; 221 222 /* Add the kernel binary to the image */ 223 ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, 224 old_kernel_pbase, new_kernel_pbase); 225 if (ret) 226 goto out; 227 228 kbuf.image = image; 229 kbuf.buf_min = new_kernel_pbase + kernel_len; 230 kbuf.buf_max = ULONG_MAX; 231 232 #ifdef CONFIG_CRASH_DUMP 233 /* Add elfcorehdr */ 234 if (image->type == KEXEC_TYPE_CRASH) { 235 void *headers; 236 unsigned long headers_sz; 237 ret = prepare_elf_headers(&headers, &headers_sz); 238 if (ret) { 239 pr_err("Preparing elf core header failed\n"); 240 goto out; 241 } 242 243 kbuf.buffer = headers; 244 kbuf.bufsz = headers_sz; 245 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 246 kbuf.memsz = headers_sz; 247 kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 248 kbuf.top_down = true; 249 250 ret = kexec_add_buffer(&kbuf); 251 if (ret) { 252 vfree(headers); 253 goto out; 254 } 255 image->elf_headers = headers; 256 image->elf_load_addr = kbuf.mem; 257 image->elf_headers_sz = headers_sz; 258 259 kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", 260 image->elf_load_addr, kbuf.bufsz, kbuf.memsz); 261 262 /* Setup cmdline for kdump kernel case */ 263 modified_cmdline = setup_kdump_cmdline(image, cmdline, 264 cmdline_len); 265 if (!modified_cmdline) { 266 pr_err("Setting up cmdline for kdump kernel failed\n"); 267 ret = -EINVAL; 268 goto out; 269 } 270 cmdline = modified_cmdline; 271 } 272 #endif 273 274 #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY 275 /* Add purgatory to the image */ 276 kbuf.top_down = true; 277 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 278 ret = kexec_load_purgatory(image, &kbuf); 279 if (ret) { 280 pr_err("Error loading purgatory ret=%d\n", ret); 281 goto out; 282 } 283 kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem); 284 285 ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", 286 &kernel_start, 287 sizeof(kernel_start), 0); 288 if (ret) 289 pr_err("Error update purgatory ret=%d\n", ret); 290 #endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ 291 292 /* Add the initrd to the image */ 293 if (initrd != NULL) { 294 kbuf.buffer = initrd; 295 kbuf.bufsz = kbuf.memsz = initrd_len; 296 kbuf.buf_align = PAGE_SIZE; 297 kbuf.top_down = true; 298 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 299 ret = kexec_add_buffer(&kbuf); 300 if (ret) 301 goto out; 302 initrd_pbase = kbuf.mem; 303 kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase); 304 } 305 306 /* Add the DTB to the image */ 307 fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, 308 initrd_len, cmdline, 0); 309 if (!fdt) { 310 pr_err("Error setting up the new device tree.\n"); 311 ret = -EINVAL; 312 goto out; 313 } 314 315 fdt_pack(fdt); 316 kbuf.buffer = fdt; 317 kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); 318 kbuf.buf_align = PAGE_SIZE; 319 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 320 kbuf.top_down = true; 321 ret = kexec_add_buffer(&kbuf); 322 if (ret) { 323 pr_err("Error add DTB kbuf ret=%d\n", ret); 324 goto out_free_fdt; 325 } 326 /* Cache the fdt buffer address for memory cleanup */ 327 image->arch.fdt = fdt; 328 kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem); 329 goto out; 330 331 out_free_fdt: 332 kvfree(fdt); 333 out: 334 kfree(modified_cmdline); 335 kexec_free_elf_info(&elf_info); 336 return ret ? ERR_PTR(ret) : NULL; 337 } 338 339 #define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) 340 #define RISCV_IMM_BITS 12 341 #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) 342 #define RISCV_CONST_HIGH_PART(x) \ 343 (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) 344 #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) 345 346 #define ENCODE_ITYPE_IMM(x) \ 347 (RV_X(x, 0, 12) << 20) 348 #define ENCODE_BTYPE_IMM(x) \ 349 ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ 350 (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) 351 #define ENCODE_UTYPE_IMM(x) \ 352 (RV_X(x, 12, 20) << 12) 353 #define ENCODE_JTYPE_IMM(x) \ 354 ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ 355 (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) 356 #define ENCODE_CBTYPE_IMM(x) \ 357 ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ 358 (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) 359 #define ENCODE_CJTYPE_IMM(x) \ 360 ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ 361 (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ 362 (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) 363 #define ENCODE_UJTYPE_IMM(x) \ 364 (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ 365 (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) 366 #define ENCODE_UITYPE_IMM(x) \ 367 (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) 368 369 #define CLEAN_IMM(type, x) \ 370 ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) 371 372 int arch_kexec_apply_relocations_add(struct purgatory_info *pi, 373 Elf_Shdr *section, 374 const Elf_Shdr *relsec, 375 const Elf_Shdr *symtab) 376 { 377 const char *strtab, *name, *shstrtab; 378 const Elf_Shdr *sechdrs; 379 Elf64_Rela *relas; 380 int i, r_type; 381 382 /* String & section header string table */ 383 sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; 384 strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; 385 shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; 386 387 relas = (void *)pi->ehdr + relsec->sh_offset; 388 389 for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { 390 const Elf_Sym *sym; /* symbol to relocate */ 391 unsigned long addr; /* final location after relocation */ 392 unsigned long val; /* relocated symbol value */ 393 unsigned long sec_base; /* relocated symbol value */ 394 void *loc; /* tmp location to modify */ 395 396 sym = (void *)pi->ehdr + symtab->sh_offset; 397 sym += ELF64_R_SYM(relas[i].r_info); 398 399 if (sym->st_name) 400 name = strtab + sym->st_name; 401 else 402 name = shstrtab + sechdrs[sym->st_shndx].sh_name; 403 404 loc = pi->purgatory_buf; 405 loc += section->sh_offset; 406 loc += relas[i].r_offset; 407 408 if (sym->st_shndx == SHN_ABS) 409 sec_base = 0; 410 else if (sym->st_shndx >= pi->ehdr->e_shnum) { 411 pr_err("Invalid section %d for symbol %s\n", 412 sym->st_shndx, name); 413 return -ENOEXEC; 414 } else 415 sec_base = pi->sechdrs[sym->st_shndx].sh_addr; 416 417 val = sym->st_value; 418 val += sec_base; 419 val += relas[i].r_addend; 420 421 addr = section->sh_addr + relas[i].r_offset; 422 423 r_type = ELF64_R_TYPE(relas[i].r_info); 424 425 switch (r_type) { 426 case R_RISCV_BRANCH: 427 *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | 428 ENCODE_BTYPE_IMM(val - addr); 429 break; 430 case R_RISCV_JAL: 431 *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | 432 ENCODE_JTYPE_IMM(val - addr); 433 break; 434 /* 435 * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I 436 * sym is expected to be next to R_RISCV_PCREL_HI20 437 * in purgatory relsec. Handle it like R_RISCV_CALL 438 * sym, instead of searching the whole relsec. 439 */ 440 case R_RISCV_PCREL_HI20: 441 case R_RISCV_CALL_PLT: 442 case R_RISCV_CALL: 443 *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | 444 ENCODE_UJTYPE_IMM(val - addr); 445 break; 446 case R_RISCV_RVC_BRANCH: 447 *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | 448 ENCODE_CBTYPE_IMM(val - addr); 449 break; 450 case R_RISCV_RVC_JUMP: 451 *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | 452 ENCODE_CJTYPE_IMM(val - addr); 453 break; 454 case R_RISCV_ADD32: 455 *(u32 *)loc += val; 456 break; 457 case R_RISCV_SUB32: 458 *(u32 *)loc -= val; 459 break; 460 /* It has been applied by R_RISCV_PCREL_HI20 sym */ 461 case R_RISCV_PCREL_LO12_I: 462 case R_RISCV_ALIGN: 463 case R_RISCV_RELAX: 464 break; 465 default: 466 pr_err("Unknown rela relocation: %d\n", r_type); 467 return -ENOEXEC; 468 } 469 } 470 return 0; 471 } 472 473 const struct kexec_file_ops elf_kexec_ops = { 474 .probe = kexec_elf_probe, 475 .load = elf_kexec_load, 476 }; 477