xref: /linux/arch/riscv/kernel/elf_kexec.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Load ELF vmlinux file for the kexec_file_load syscall.
4  *
5  * Copyright (C) 2021 Huawei Technologies Co, Ltd.
6  *
7  * Author: Liao Chang (liaochang1@huawei.com)
8  *
9  * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
10  * for kernel.
11  */
12 
13 #define pr_fmt(fmt)	"kexec_image: " fmt
14 
15 #include <linux/elf.h>
16 #include <linux/kexec.h>
17 #include <linux/slab.h>
18 #include <linux/of.h>
19 #include <linux/libfdt.h>
20 #include <linux/types.h>
21 #include <linux/memblock.h>
22 #include <asm/setup.h>
23 
24 static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
25 				struct kexec_elf_info *elf_info, unsigned long old_pbase,
26 				unsigned long new_pbase)
27 {
28 	int i;
29 	int ret = 0;
30 	size_t size;
31 	struct kexec_buf kbuf;
32 	const struct elf_phdr *phdr;
33 
34 	kbuf.image = image;
35 
36 	for (i = 0; i < ehdr->e_phnum; i++) {
37 		phdr = &elf_info->proghdrs[i];
38 		if (phdr->p_type != PT_LOAD)
39 			continue;
40 
41 		size = phdr->p_filesz;
42 		if (size > phdr->p_memsz)
43 			size = phdr->p_memsz;
44 
45 		kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
46 		kbuf.bufsz = size;
47 		kbuf.buf_align = phdr->p_align;
48 		kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
49 		kbuf.memsz = phdr->p_memsz;
50 		kbuf.top_down = false;
51 		ret = kexec_add_buffer(&kbuf);
52 		if (ret)
53 			break;
54 	}
55 
56 	return ret;
57 }
58 
59 /*
60  * Go through the available phsyical memory regions and find one that hold
61  * an image of the specified size.
62  */
63 static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
64 			  struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
65 			  unsigned long *old_pbase, unsigned long *new_pbase)
66 {
67 	int i;
68 	int ret;
69 	struct kexec_buf kbuf;
70 	const struct elf_phdr *phdr;
71 	unsigned long lowest_paddr = ULONG_MAX;
72 	unsigned long lowest_vaddr = ULONG_MAX;
73 
74 	for (i = 0; i < ehdr->e_phnum; i++) {
75 		phdr = &elf_info->proghdrs[i];
76 		if (phdr->p_type != PT_LOAD)
77 			continue;
78 
79 		if (lowest_paddr > phdr->p_paddr)
80 			lowest_paddr = phdr->p_paddr;
81 
82 		if (lowest_vaddr > phdr->p_vaddr)
83 			lowest_vaddr = phdr->p_vaddr;
84 	}
85 
86 	kbuf.image = image;
87 	kbuf.buf_min = lowest_paddr;
88 	kbuf.buf_max = ULONG_MAX;
89 	kbuf.buf_align = PAGE_SIZE;
90 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
91 	kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
92 	kbuf.top_down = false;
93 	ret = arch_kexec_locate_mem_hole(&kbuf);
94 	if (!ret) {
95 		*old_pbase = lowest_paddr;
96 		*new_pbase = kbuf.mem;
97 		image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
98 	}
99 	return ret;
100 }
101 
102 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
103 {
104 	unsigned int *nr_ranges = arg;
105 
106 	(*nr_ranges)++;
107 	return 0;
108 }
109 
110 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
111 {
112 	struct crash_mem *cmem = arg;
113 
114 	cmem->ranges[cmem->nr_ranges].start = res->start;
115 	cmem->ranges[cmem->nr_ranges].end = res->end;
116 	cmem->nr_ranges++;
117 
118 	return 0;
119 }
120 
121 static int prepare_elf_headers(void **addr, unsigned long *sz)
122 {
123 	struct crash_mem *cmem;
124 	unsigned int nr_ranges;
125 	int ret;
126 
127 	nr_ranges = 1; /* For exclusion of crashkernel region */
128 	walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
129 
130 	cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
131 	if (!cmem)
132 		return -ENOMEM;
133 
134 	cmem->max_nr_ranges = nr_ranges;
135 	cmem->nr_ranges = 0;
136 	ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
137 	if (ret)
138 		goto out;
139 
140 	/* Exclude crashkernel region */
141 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
142 	if (!ret)
143 		ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
144 
145 out:
146 	kfree(cmem);
147 	return ret;
148 }
149 
150 static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
151 				 unsigned long cmdline_len)
152 {
153 	int elfcorehdr_strlen;
154 	char *cmdline_ptr;
155 
156 	cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
157 	if (!cmdline_ptr)
158 		return NULL;
159 
160 	elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
161 		image->elf_load_addr);
162 
163 	if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
164 		pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
165 		kfree(cmdline_ptr);
166 		return NULL;
167 	}
168 
169 	memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
170 	/* Ensure it's nul terminated */
171 	cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
172 	return cmdline_ptr;
173 }
174 
175 static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
176 			    unsigned long kernel_len, char *initrd,
177 			    unsigned long initrd_len, char *cmdline,
178 			    unsigned long cmdline_len)
179 {
180 	int ret;
181 	unsigned long old_kernel_pbase = ULONG_MAX;
182 	unsigned long new_kernel_pbase = 0UL;
183 	unsigned long initrd_pbase = 0UL;
184 	unsigned long headers_sz;
185 	unsigned long kernel_start;
186 	void *fdt, *headers;
187 	struct elfhdr ehdr;
188 	struct kexec_buf kbuf;
189 	struct kexec_elf_info elf_info;
190 	char *modified_cmdline = NULL;
191 
192 	ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
193 	if (ret)
194 		return ERR_PTR(ret);
195 
196 	ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
197 			     &old_kernel_pbase, &new_kernel_pbase);
198 	if (ret)
199 		goto out;
200 	kernel_start = image->start;
201 	pr_notice("The entry point of kernel at 0x%lx\n", image->start);
202 
203 	/* Add the kernel binary to the image */
204 	ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
205 				   old_kernel_pbase, new_kernel_pbase);
206 	if (ret)
207 		goto out;
208 
209 	kbuf.image = image;
210 	kbuf.buf_min = new_kernel_pbase + kernel_len;
211 	kbuf.buf_max = ULONG_MAX;
212 
213 	/* Add elfcorehdr */
214 	if (image->type == KEXEC_TYPE_CRASH) {
215 		ret = prepare_elf_headers(&headers, &headers_sz);
216 		if (ret) {
217 			pr_err("Preparing elf core header failed\n");
218 			goto out;
219 		}
220 
221 		kbuf.buffer = headers;
222 		kbuf.bufsz = headers_sz;
223 		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
224 		kbuf.memsz = headers_sz;
225 		kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
226 		kbuf.top_down = true;
227 
228 		ret = kexec_add_buffer(&kbuf);
229 		if (ret) {
230 			vfree(headers);
231 			goto out;
232 		}
233 		image->elf_headers = headers;
234 		image->elf_load_addr = kbuf.mem;
235 		image->elf_headers_sz = headers_sz;
236 
237 		pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
238 			 image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
239 
240 		/* Setup cmdline for kdump kernel case */
241 		modified_cmdline = setup_kdump_cmdline(image, cmdline,
242 						       cmdline_len);
243 		if (!modified_cmdline) {
244 			pr_err("Setting up cmdline for kdump kernel failed\n");
245 			ret = -EINVAL;
246 			goto out;
247 		}
248 		cmdline = modified_cmdline;
249 	}
250 
251 #ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY
252 	/* Add purgatory to the image */
253 	kbuf.top_down = true;
254 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
255 	ret = kexec_load_purgatory(image, &kbuf);
256 	if (ret) {
257 		pr_err("Error loading purgatory ret=%d\n", ret);
258 		goto out;
259 	}
260 	ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
261 					     &kernel_start,
262 					     sizeof(kernel_start), 0);
263 	if (ret)
264 		pr_err("Error update purgatory ret=%d\n", ret);
265 #endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */
266 
267 	/* Add the initrd to the image */
268 	if (initrd != NULL) {
269 		kbuf.buffer = initrd;
270 		kbuf.bufsz = kbuf.memsz = initrd_len;
271 		kbuf.buf_align = PAGE_SIZE;
272 		kbuf.top_down = false;
273 		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
274 		ret = kexec_add_buffer(&kbuf);
275 		if (ret)
276 			goto out;
277 		initrd_pbase = kbuf.mem;
278 		pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase);
279 	}
280 
281 	/* Add the DTB to the image */
282 	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
283 					   initrd_len, cmdline, 0);
284 	if (!fdt) {
285 		pr_err("Error setting up the new device tree.\n");
286 		ret = -EINVAL;
287 		goto out;
288 	}
289 
290 	fdt_pack(fdt);
291 	kbuf.buffer = fdt;
292 	kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
293 	kbuf.buf_align = PAGE_SIZE;
294 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
295 	kbuf.top_down = true;
296 	ret = kexec_add_buffer(&kbuf);
297 	if (ret) {
298 		pr_err("Error add DTB kbuf ret=%d\n", ret);
299 		goto out_free_fdt;
300 	}
301 	pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem);
302 	goto out;
303 
304 out_free_fdt:
305 	kvfree(fdt);
306 out:
307 	kfree(modified_cmdline);
308 	kexec_free_elf_info(&elf_info);
309 	return ret ? ERR_PTR(ret) : NULL;
310 }
311 
312 #define RV_X(x, s, n)  (((x) >> (s)) & ((1 << (n)) - 1))
313 #define RISCV_IMM_BITS 12
314 #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
315 #define RISCV_CONST_HIGH_PART(x) \
316 	(((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
317 #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
318 
319 #define ENCODE_ITYPE_IMM(x) \
320 	(RV_X(x, 0, 12) << 20)
321 #define ENCODE_BTYPE_IMM(x) \
322 	((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
323 	(RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
324 #define ENCODE_UTYPE_IMM(x) \
325 	(RV_X(x, 12, 20) << 12)
326 #define ENCODE_JTYPE_IMM(x) \
327 	((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
328 	(RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
329 #define ENCODE_CBTYPE_IMM(x) \
330 	((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
331 	(RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
332 #define ENCODE_CJTYPE_IMM(x) \
333 	((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
334 	(RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
335 	(RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
336 #define ENCODE_UJTYPE_IMM(x) \
337 	(ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
338 	(ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
339 #define ENCODE_UITYPE_IMM(x) \
340 	(ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
341 
342 #define CLEAN_IMM(type, x) \
343 	((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
344 
345 int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
346 				     Elf_Shdr *section,
347 				     const Elf_Shdr *relsec,
348 				     const Elf_Shdr *symtab)
349 {
350 	const char *strtab, *name, *shstrtab;
351 	const Elf_Shdr *sechdrs;
352 	Elf_Rela *relas;
353 	int i, r_type;
354 
355 	/* String & section header string table */
356 	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
357 	strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
358 	shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
359 
360 	relas = (void *)pi->ehdr + relsec->sh_offset;
361 
362 	for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
363 		const Elf_Sym *sym;	/* symbol to relocate */
364 		unsigned long addr;	/* final location after relocation */
365 		unsigned long val;	/* relocated symbol value */
366 		unsigned long sec_base;	/* relocated symbol value */
367 		void *loc;		/* tmp location to modify */
368 
369 		sym = (void *)pi->ehdr + symtab->sh_offset;
370 		sym += ELF64_R_SYM(relas[i].r_info);
371 
372 		if (sym->st_name)
373 			name = strtab + sym->st_name;
374 		else
375 			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
376 
377 		loc = pi->purgatory_buf;
378 		loc += section->sh_offset;
379 		loc += relas[i].r_offset;
380 
381 		if (sym->st_shndx == SHN_ABS)
382 			sec_base = 0;
383 		else if (sym->st_shndx >= pi->ehdr->e_shnum) {
384 			pr_err("Invalid section %d for symbol %s\n",
385 			       sym->st_shndx, name);
386 			return -ENOEXEC;
387 		} else
388 			sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
389 
390 		val = sym->st_value;
391 		val += sec_base;
392 		val += relas[i].r_addend;
393 
394 		addr = section->sh_addr + relas[i].r_offset;
395 
396 		r_type = ELF64_R_TYPE(relas[i].r_info);
397 
398 		switch (r_type) {
399 		case R_RISCV_BRANCH:
400 			*(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
401 				 ENCODE_BTYPE_IMM(val - addr);
402 			break;
403 		case R_RISCV_JAL:
404 			*(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
405 				 ENCODE_JTYPE_IMM(val - addr);
406 			break;
407 		/*
408 		 * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
409 		 * sym is expected to be next to R_RISCV_PCREL_HI20
410 		 * in purgatory relsec. Handle it like R_RISCV_CALL
411 		 * sym, instead of searching the whole relsec.
412 		 */
413 		case R_RISCV_PCREL_HI20:
414 		case R_RISCV_CALL:
415 			*(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
416 				 ENCODE_UJTYPE_IMM(val - addr);
417 			break;
418 		case R_RISCV_RVC_BRANCH:
419 			*(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
420 				 ENCODE_CBTYPE_IMM(val - addr);
421 			break;
422 		case R_RISCV_RVC_JUMP:
423 			*(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
424 				 ENCODE_CJTYPE_IMM(val - addr);
425 			break;
426 		case R_RISCV_ADD32:
427 			*(u32 *)loc += val;
428 			break;
429 		case R_RISCV_SUB32:
430 			*(u32 *)loc -= val;
431 			break;
432 		/* It has been applied by R_RISCV_PCREL_HI20 sym */
433 		case R_RISCV_PCREL_LO12_I:
434 		case R_RISCV_ALIGN:
435 		case R_RISCV_RELAX:
436 			break;
437 		default:
438 			pr_err("Unknown rela relocation: %d\n", r_type);
439 			return -ENOEXEC;
440 		}
441 	}
442 	return 0;
443 }
444 
445 const struct kexec_file_ops elf_kexec_ops = {
446 	.probe = kexec_elf_probe,
447 	.load  = elf_kexec_load,
448 };
449