xref: /linux/fs/binfmt_elf.c (revision 61706251492eff650e91c58507bc77e1a12c7fbb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <uapi/linux/rseq.h>
50 #include <linux/rseq.h>
51 #include <asm/param.h>
52 #include <asm/page.h>
53 
54 #ifndef ELF_COMPAT
55 #define ELF_COMPAT 0
56 #endif
57 
58 #ifndef user_long_t
59 #define user_long_t long
60 #endif
61 #ifndef user_siginfo_t
62 #define user_siginfo_t siginfo_t
63 #endif
64 
65 /* That's for binfmt_elf_fdpic to deal with */
66 #ifndef elf_check_fdpic
67 #define elf_check_fdpic(ex) false
68 #endif
69 
70 static int load_elf_binary(struct linux_binprm *bprm);
71 
72 /*
73  * If we don't support core dumping, then supply a NULL so we
74  * don't even try.
75  */
76 #ifdef CONFIG_ELF_CORE
77 static int elf_core_dump(struct coredump_params *cprm);
78 #else
79 #define elf_core_dump	NULL
80 #endif
81 
82 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
83 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
84 #else
85 #define ELF_MIN_ALIGN	PAGE_SIZE
86 #endif
87 
88 #ifndef ELF_CORE_EFLAGS
89 #define ELF_CORE_EFLAGS	0
90 #endif
91 
92 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
93 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
94 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
95 
96 static struct linux_binfmt elf_format = {
97 	.module		= THIS_MODULE,
98 	.load_binary	= load_elf_binary,
99 #ifdef CONFIG_COREDUMP
100 	.core_dump	= elf_core_dump,
101 	.min_coredump	= ELF_EXEC_PAGESIZE,
102 #endif
103 };
104 
105 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
106 
elf_coredump_set_mm_eflags(struct mm_struct * mm,u32 flags)107 static inline void elf_coredump_set_mm_eflags(struct mm_struct *mm, u32 flags)
108 {
109 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
110 	mm->saved_e_flags = flags;
111 #endif
112 }
113 
elf_coredump_get_mm_eflags(struct mm_struct * mm,u32 flags)114 static inline u32 elf_coredump_get_mm_eflags(struct mm_struct *mm, u32 flags)
115 {
116 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
117 	flags = mm->saved_e_flags;
118 #endif
119 	return flags;
120 }
121 
122 /*
123  * We need to explicitly zero any trailing portion of the page that follows
124  * p_filesz when it ends before the page ends (e.g. bss), otherwise this
125  * memory will contain the junk from the file that should not be present.
126  */
padzero(unsigned long address)127 static int padzero(unsigned long address)
128 {
129 	unsigned long nbyte;
130 
131 	nbyte = ELF_PAGEOFFSET(address);
132 	if (nbyte) {
133 		nbyte = ELF_MIN_ALIGN - nbyte;
134 		if (clear_user((void __user *)address, nbyte))
135 			return -EFAULT;
136 	}
137 	return 0;
138 }
139 
140 /* Let's use some macros to make this stack manipulation a little clearer */
141 #ifdef CONFIG_STACK_GROWSUP
142 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
143 #define STACK_ROUND(sp, items) \
144 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
145 #define STACK_ALLOC(sp, len) ({ \
146 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
147 	old_sp; })
148 #else
149 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
150 #define STACK_ROUND(sp, items) \
151 	(((unsigned long) (sp - items)) &~ 15UL)
152 #define STACK_ALLOC(sp, len) (sp -= len)
153 #endif
154 
155 #ifndef ELF_BASE_PLATFORM
156 /*
157  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
158  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
159  * will be copied to the user stack in the same manner as AT_PLATFORM.
160  */
161 #define ELF_BASE_PLATFORM NULL
162 #endif
163 
164 static int
create_elf_tables(struct linux_binprm * bprm,const struct elfhdr * exec,unsigned long interp_load_addr,unsigned long e_entry,unsigned long phdr_addr)165 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
166 		unsigned long interp_load_addr,
167 		unsigned long e_entry, unsigned long phdr_addr)
168 {
169 	struct mm_struct *mm = current->mm;
170 	unsigned long p = bprm->p;
171 	int argc = bprm->argc;
172 	int envc = bprm->envc;
173 	elf_addr_t __user *sp;
174 	elf_addr_t __user *u_platform;
175 	elf_addr_t __user *u_base_platform;
176 	elf_addr_t __user *u_rand_bytes;
177 	const char *k_platform = ELF_PLATFORM;
178 	const char *k_base_platform = ELF_BASE_PLATFORM;
179 	unsigned char k_rand_bytes[16];
180 	int items;
181 	elf_addr_t *elf_info;
182 	elf_addr_t flags = 0;
183 	int ei_index;
184 	const struct cred *cred = current_cred();
185 	struct vm_area_struct *vma;
186 
187 	/*
188 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
189 	 * evictions by the processes running on the same package. One
190 	 * thing we can do is to shuffle the initial stack for them.
191 	 */
192 
193 	p = arch_align_stack(p);
194 
195 	/*
196 	 * If this architecture has a platform capability string, copy it
197 	 * to userspace.  In some cases (Sparc), this info is impossible
198 	 * for userspace to get any other way, in others (i386) it is
199 	 * merely difficult.
200 	 */
201 	u_platform = NULL;
202 	if (k_platform) {
203 		size_t len = strlen(k_platform) + 1;
204 
205 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
206 		if (copy_to_user(u_platform, k_platform, len))
207 			return -EFAULT;
208 	}
209 
210 	/*
211 	 * If this architecture has a "base" platform capability
212 	 * string, copy it to userspace.
213 	 */
214 	u_base_platform = NULL;
215 	if (k_base_platform) {
216 		size_t len = strlen(k_base_platform) + 1;
217 
218 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
219 		if (copy_to_user(u_base_platform, k_base_platform, len))
220 			return -EFAULT;
221 	}
222 
223 	/*
224 	 * Generate 16 random bytes for userspace PRNG seeding.
225 	 */
226 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
227 	u_rand_bytes = (elf_addr_t __user *)
228 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
229 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
230 		return -EFAULT;
231 
232 	/* Create the ELF interpreter info */
233 	elf_info = (elf_addr_t *)mm->saved_auxv;
234 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
235 #define NEW_AUX_ENT(id, val) \
236 	do { \
237 		*elf_info++ = id; \
238 		*elf_info++ = val; \
239 	} while (0)
240 
241 #ifdef ARCH_DLINFO
242 	/*
243 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
244 	 * AUXV.
245 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
246 	 * ARCH_DLINFO changes
247 	 */
248 	ARCH_DLINFO;
249 #endif
250 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
251 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
252 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
253 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
254 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
255 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
256 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
257 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
258 		flags |= AT_FLAGS_PRESERVE_ARGV0;
259 	NEW_AUX_ENT(AT_FLAGS, flags);
260 	NEW_AUX_ENT(AT_ENTRY, e_entry);
261 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
262 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
263 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
264 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
265 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
266 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
267 #ifdef ELF_HWCAP2
268 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
269 #endif
270 #ifdef ELF_HWCAP3
271 	NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
272 #endif
273 #ifdef ELF_HWCAP4
274 	NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
275 #endif
276 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
277 	if (k_platform) {
278 		NEW_AUX_ENT(AT_PLATFORM,
279 			    (elf_addr_t)(unsigned long)u_platform);
280 	}
281 	if (k_base_platform) {
282 		NEW_AUX_ENT(AT_BASE_PLATFORM,
283 			    (elf_addr_t)(unsigned long)u_base_platform);
284 	}
285 	if (bprm->have_execfd) {
286 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
287 	}
288 #ifdef CONFIG_RSEQ
289 	NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
290 	NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
291 #endif
292 #undef NEW_AUX_ENT
293 	/* AT_NULL is zero; clear the rest too */
294 	memset(elf_info, 0, (char *)mm->saved_auxv +
295 			sizeof(mm->saved_auxv) - (char *)elf_info);
296 
297 	/* And advance past the AT_NULL entry.  */
298 	elf_info += 2;
299 
300 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
301 	sp = STACK_ADD(p, ei_index);
302 
303 	items = (argc + 1) + (envc + 1) + 1;
304 	bprm->p = STACK_ROUND(sp, items);
305 
306 	/* Point sp at the lowest address on the stack */
307 #ifdef CONFIG_STACK_GROWSUP
308 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
309 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
310 #else
311 	sp = (elf_addr_t __user *)bprm->p;
312 #endif
313 
314 
315 	/*
316 	 * Grow the stack manually; some architectures have a limit on how
317 	 * far ahead a user-space access may be in order to grow the stack.
318 	 */
319 	if (mmap_write_lock_killable(mm))
320 		return -EINTR;
321 	vma = find_extend_vma_locked(mm, bprm->p);
322 	mmap_write_unlock(mm);
323 	if (!vma)
324 		return -EFAULT;
325 
326 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
327 	if (put_user(argc, sp++))
328 		return -EFAULT;
329 
330 	/* Populate list of argv pointers back to argv strings. */
331 	p = mm->arg_end = mm->arg_start;
332 	while (argc-- > 0) {
333 		size_t len;
334 		if (put_user((elf_addr_t)p, sp++))
335 			return -EFAULT;
336 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
337 		if (!len || len > MAX_ARG_STRLEN)
338 			return -EINVAL;
339 		p += len;
340 	}
341 	if (put_user(0, sp++))
342 		return -EFAULT;
343 	mm->arg_end = p;
344 
345 	/* Populate list of envp pointers back to envp strings. */
346 	mm->env_end = mm->env_start = p;
347 	while (envc-- > 0) {
348 		size_t len;
349 		if (put_user((elf_addr_t)p, sp++))
350 			return -EFAULT;
351 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
352 		if (!len || len > MAX_ARG_STRLEN)
353 			return -EINVAL;
354 		p += len;
355 	}
356 	if (put_user(0, sp++))
357 		return -EFAULT;
358 	mm->env_end = p;
359 
360 	/* Put the elf_info on the stack in the right place.  */
361 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
362 		return -EFAULT;
363 	return 0;
364 }
365 
366 /*
367  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
368  * into memory at "addr". (Note that p_filesz is rounded up to the
369  * next page, so any extra bytes from the file must be wiped.)
370  */
elf_map(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)371 static unsigned long elf_map(struct file *filep, unsigned long addr,
372 		const struct elf_phdr *eppnt, int prot, int type,
373 		unsigned long total_size)
374 {
375 	unsigned long map_addr;
376 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
377 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
378 	addr = ELF_PAGESTART(addr);
379 	size = ELF_PAGEALIGN(size);
380 
381 	/* mmap() will return -EINVAL if given a zero size, but a
382 	 * segment with zero filesize is perfectly valid */
383 	if (!size)
384 		return addr;
385 
386 	/*
387 	* total_size is the size of the ELF (interpreter) image.
388 	* The _first_ mmap needs to know the full size, otherwise
389 	* randomization might put this image into an overlapping
390 	* position with the ELF binary image. (since size < total_size)
391 	* So we first map the 'big' image - and unmap the remainder at
392 	* the end. (which unmap is needed for ELF images with holes.)
393 	*/
394 	if (total_size) {
395 		total_size = ELF_PAGEALIGN(total_size);
396 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
397 		if (!BAD_ADDR(map_addr))
398 			vm_munmap(map_addr+size, total_size-size);
399 	} else
400 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
401 
402 	if ((type & MAP_FIXED_NOREPLACE) &&
403 	    PTR_ERR((void *)map_addr) == -EEXIST)
404 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
405 			task_pid_nr(current), current->comm, (void *)addr);
406 
407 	return(map_addr);
408 }
409 
410 /*
411  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
412  * into memory at "addr". Memory from "p_filesz" through "p_memsz"
413  * rounded up to the next page is zeroed.
414  */
elf_load(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)415 static unsigned long elf_load(struct file *filep, unsigned long addr,
416 		const struct elf_phdr *eppnt, int prot, int type,
417 		unsigned long total_size)
418 {
419 	unsigned long zero_start, zero_end;
420 	unsigned long map_addr;
421 
422 	if (eppnt->p_filesz) {
423 		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
424 		if (BAD_ADDR(map_addr))
425 			return map_addr;
426 		if (eppnt->p_memsz > eppnt->p_filesz) {
427 			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
428 				eppnt->p_filesz;
429 			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
430 				eppnt->p_memsz;
431 
432 			/*
433 			 * Zero the end of the last mapped page but ignore
434 			 * any errors if the segment isn't writable.
435 			 */
436 			if (padzero(zero_start) && (prot & PROT_WRITE))
437 				return -EFAULT;
438 		}
439 	} else {
440 		map_addr = zero_start = ELF_PAGESTART(addr);
441 		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
442 			eppnt->p_memsz;
443 	}
444 	if (eppnt->p_memsz > eppnt->p_filesz) {
445 		/*
446 		 * Map the last of the segment.
447 		 * If the header is requesting these pages to be
448 		 * executable, honour that (ppc32 needs this).
449 		 */
450 		int error;
451 
452 		zero_start = ELF_PAGEALIGN(zero_start);
453 		zero_end = ELF_PAGEALIGN(zero_end);
454 
455 		error = vm_brk_flags(zero_start, zero_end - zero_start,
456 				     prot & PROT_EXEC ? VM_EXEC : 0);
457 		if (error)
458 			map_addr = error;
459 	}
460 	return map_addr;
461 }
462 
463 
total_mapping_size(const struct elf_phdr * phdr,int nr)464 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
465 {
466 	elf_addr_t min_addr = -1;
467 	elf_addr_t max_addr = 0;
468 	bool pt_load = false;
469 	int i;
470 
471 	for (i = 0; i < nr; i++) {
472 		if (phdr[i].p_type == PT_LOAD) {
473 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
474 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
475 			pt_load = true;
476 		}
477 	}
478 	return pt_load ? (max_addr - min_addr) : 0;
479 }
480 
elf_read(struct file * file,void * buf,size_t len,loff_t pos)481 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
482 {
483 	ssize_t rv;
484 
485 	rv = kernel_read(file, buf, len, &pos);
486 	if (unlikely(rv != len)) {
487 		return (rv < 0) ? rv : -EIO;
488 	}
489 	return 0;
490 }
491 
maximum_alignment(struct elf_phdr * cmds,int nr)492 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
493 {
494 	unsigned long alignment = 0;
495 	int i;
496 
497 	for (i = 0; i < nr; i++) {
498 		if (cmds[i].p_type == PT_LOAD) {
499 			unsigned long p_align = cmds[i].p_align;
500 
501 			/* skip non-power of two alignments as invalid */
502 			if (!is_power_of_2(p_align))
503 				continue;
504 			alignment = max(alignment, p_align);
505 		}
506 	}
507 
508 	/* ensure we align to at least one page */
509 	return ELF_PAGEALIGN(alignment);
510 }
511 
512 /**
513  * load_elf_phdrs() - load ELF program headers
514  * @elf_ex:   ELF header of the binary whose program headers should be loaded
515  * @elf_file: the opened ELF binary file
516  *
517  * Loads ELF program headers from the binary file elf_file, which has the ELF
518  * header pointed to by elf_ex, into a newly allocated array. The caller is
519  * responsible for freeing the allocated data. Returns NULL upon failure.
520  */
load_elf_phdrs(const struct elfhdr * elf_ex,struct file * elf_file)521 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
522 				       struct file *elf_file)
523 {
524 	struct elf_phdr *elf_phdata = NULL;
525 	int retval = -1;
526 	unsigned int size;
527 
528 	/*
529 	 * If the size of this structure has changed, then punt, since
530 	 * we will be doing the wrong thing.
531 	 */
532 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
533 		goto out;
534 
535 	/* Sanity check the number of program headers... */
536 	/* ...and their total size. */
537 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
538 	if (size == 0 || size > 65536)
539 		goto out;
540 
541 	elf_phdata = kmalloc(size, GFP_KERNEL);
542 	if (!elf_phdata)
543 		goto out;
544 
545 	/* Read in the program headers */
546 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
547 
548 out:
549 	if (retval) {
550 		kfree(elf_phdata);
551 		elf_phdata = NULL;
552 	}
553 	return elf_phdata;
554 }
555 
556 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
557 
558 /**
559  * struct arch_elf_state - arch-specific ELF loading state
560  *
561  * This structure is used to preserve architecture specific data during
562  * the loading of an ELF file, throughout the checking of architecture
563  * specific ELF headers & through to the point where the ELF load is
564  * known to be proceeding (ie. SET_PERSONALITY).
565  *
566  * This implementation is a dummy for architectures which require no
567  * specific state.
568  */
569 struct arch_elf_state {
570 };
571 
572 #define INIT_ARCH_ELF_STATE {}
573 
574 /**
575  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
576  * @ehdr:	The main ELF header
577  * @phdr:	The program header to check
578  * @elf:	The open ELF file
579  * @is_interp:	True if the phdr is from the interpreter of the ELF being
580  *		loaded, else false.
581  * @state:	Architecture-specific state preserved throughout the process
582  *		of loading the ELF.
583  *
584  * Inspects the program header phdr to validate its correctness and/or
585  * suitability for the system. Called once per ELF program header in the
586  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
587  * interpreter.
588  *
589  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
590  *         with that return code.
591  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)592 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
593 				   struct elf_phdr *phdr,
594 				   struct file *elf, bool is_interp,
595 				   struct arch_elf_state *state)
596 {
597 	/* Dummy implementation, always proceed */
598 	return 0;
599 }
600 
601 /**
602  * arch_check_elf() - check an ELF executable
603  * @ehdr:	The main ELF header
604  * @has_interp:	True if the ELF has an interpreter, else false.
605  * @interp_ehdr: The interpreter's ELF header
606  * @state:	Architecture-specific state preserved throughout the process
607  *		of loading the ELF.
608  *
609  * Provides a final opportunity for architecture code to reject the loading
610  * of the ELF & cause an exec syscall to return an error. This is called after
611  * all program headers to be checked by arch_elf_pt_proc have been.
612  *
613  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
614  *         with that return code.
615  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)616 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
617 				 struct elfhdr *interp_ehdr,
618 				 struct arch_elf_state *state)
619 {
620 	/* Dummy implementation, always proceed */
621 	return 0;
622 }
623 
624 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
625 
make_prot(u32 p_flags,struct arch_elf_state * arch_state,bool has_interp,bool is_interp)626 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
627 			    bool has_interp, bool is_interp)
628 {
629 	int prot = 0;
630 
631 	if (p_flags & PF_R)
632 		prot |= PROT_READ;
633 	if (p_flags & PF_W)
634 		prot |= PROT_WRITE;
635 	if (p_flags & PF_X)
636 		prot |= PROT_EXEC;
637 
638 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
639 }
640 
641 /* This is much more generalized than the library routine read function,
642    so we keep this separate.  Technically the library read function
643    is only provided so that we can read a.out libraries that have
644    an ELF header */
645 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long no_base,struct elf_phdr * interp_elf_phdata,struct arch_elf_state * arch_state)646 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
647 		struct file *interpreter,
648 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
649 		struct arch_elf_state *arch_state)
650 {
651 	struct elf_phdr *eppnt;
652 	unsigned long load_addr = 0;
653 	int load_addr_set = 0;
654 	unsigned long error = ~0UL;
655 	unsigned long total_size;
656 	int i;
657 
658 	/* First of all, some simple consistency checks */
659 	if (interp_elf_ex->e_type != ET_EXEC &&
660 	    interp_elf_ex->e_type != ET_DYN)
661 		goto out;
662 	if (!elf_check_arch(interp_elf_ex) ||
663 	    elf_check_fdpic(interp_elf_ex))
664 		goto out;
665 	if (!can_mmap_file(interpreter))
666 		goto out;
667 
668 	total_size = total_mapping_size(interp_elf_phdata,
669 					interp_elf_ex->e_phnum);
670 	if (!total_size) {
671 		error = -EINVAL;
672 		goto out;
673 	}
674 
675 	eppnt = interp_elf_phdata;
676 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
677 		if (eppnt->p_type == PT_LOAD) {
678 			int elf_type = MAP_PRIVATE;
679 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
680 						 true, true);
681 			unsigned long vaddr = 0;
682 			unsigned long k, map_addr;
683 
684 			vaddr = eppnt->p_vaddr;
685 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
686 				elf_type |= MAP_FIXED;
687 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
688 				load_addr = -vaddr;
689 
690 			map_addr = elf_load(interpreter, load_addr + vaddr,
691 					eppnt, elf_prot, elf_type, total_size);
692 			total_size = 0;
693 			error = map_addr;
694 			if (BAD_ADDR(map_addr))
695 				goto out;
696 
697 			if (!load_addr_set &&
698 			    interp_elf_ex->e_type == ET_DYN) {
699 				load_addr = map_addr - ELF_PAGESTART(vaddr);
700 				load_addr_set = 1;
701 			}
702 
703 			/*
704 			 * Check to see if the section's size will overflow the
705 			 * allowed task size. Note that p_filesz must always be
706 			 * <= p_memsize so it's only necessary to check p_memsz.
707 			 */
708 			k = load_addr + eppnt->p_vaddr;
709 			if (BAD_ADDR(k) ||
710 			    eppnt->p_filesz > eppnt->p_memsz ||
711 			    eppnt->p_memsz > TASK_SIZE ||
712 			    TASK_SIZE - eppnt->p_memsz < k) {
713 				error = -ENOMEM;
714 				goto out;
715 			}
716 		}
717 	}
718 
719 	error = load_addr;
720 out:
721 	return error;
722 }
723 
724 /*
725  * These are the functions used to load ELF style executables and shared
726  * libraries.  There is no binary dependent code anywhere else.
727  */
728 
parse_elf_property(const char * data,size_t * off,size_t datasz,struct arch_elf_state * arch,bool have_prev_type,u32 * prev_type)729 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
730 			      struct arch_elf_state *arch,
731 			      bool have_prev_type, u32 *prev_type)
732 {
733 	size_t o, step;
734 	const struct gnu_property *pr;
735 	int ret;
736 
737 	if (*off == datasz)
738 		return -ENOENT;
739 
740 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
741 		return -EIO;
742 	o = *off;
743 	datasz -= *off;
744 
745 	if (datasz < sizeof(*pr))
746 		return -ENOEXEC;
747 	pr = (const struct gnu_property *)(data + o);
748 	o += sizeof(*pr);
749 	datasz -= sizeof(*pr);
750 
751 	if (pr->pr_datasz > datasz)
752 		return -ENOEXEC;
753 
754 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
755 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
756 	if (step > datasz)
757 		return -ENOEXEC;
758 
759 	/* Properties are supposed to be unique and sorted on pr_type: */
760 	if (have_prev_type && pr->pr_type <= *prev_type)
761 		return -ENOEXEC;
762 	*prev_type = pr->pr_type;
763 
764 	ret = arch_parse_elf_property(pr->pr_type, data + o,
765 				      pr->pr_datasz, ELF_COMPAT, arch);
766 	if (ret)
767 		return ret;
768 
769 	*off = o + step;
770 	return 0;
771 }
772 
773 #define NOTE_DATA_SZ SZ_1K
774 #define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
775 
parse_elf_properties(struct file * f,const struct elf_phdr * phdr,struct arch_elf_state * arch)776 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
777 				struct arch_elf_state *arch)
778 {
779 	union {
780 		struct elf_note nhdr;
781 		char data[NOTE_DATA_SZ];
782 	} note;
783 	loff_t pos;
784 	ssize_t n;
785 	size_t off, datasz;
786 	int ret;
787 	bool have_prev_type;
788 	u32 prev_type;
789 
790 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
791 		return 0;
792 
793 	/* load_elf_binary() shouldn't call us unless this is true... */
794 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
795 		return -ENOEXEC;
796 
797 	/* If the properties are crazy large, that's too bad (for now): */
798 	if (phdr->p_filesz > sizeof(note))
799 		return -ENOEXEC;
800 
801 	pos = phdr->p_offset;
802 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
803 
804 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
805 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
806 		return -EIO;
807 
808 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
809 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
810 	    strncmp(note.data + sizeof(note.nhdr),
811 		    NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
812 		return -ENOEXEC;
813 
814 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
815 		       ELF_GNU_PROPERTY_ALIGN);
816 	if (off > n)
817 		return -ENOEXEC;
818 
819 	if (note.nhdr.n_descsz > n - off)
820 		return -ENOEXEC;
821 	datasz = off + note.nhdr.n_descsz;
822 
823 	have_prev_type = false;
824 	do {
825 		ret = parse_elf_property(note.data, &off, datasz, arch,
826 					 have_prev_type, &prev_type);
827 		have_prev_type = true;
828 	} while (!ret);
829 
830 	return ret == -ENOENT ? 0 : ret;
831 }
832 
load_elf_binary(struct linux_binprm * bprm)833 static int load_elf_binary(struct linux_binprm *bprm)
834 {
835 	struct file *interpreter = NULL; /* to shut gcc up */
836 	unsigned long load_bias = 0, phdr_addr = 0;
837 	int first_pt_load = 1;
838 	unsigned long error;
839 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
840 	struct elf_phdr *elf_property_phdata = NULL;
841 	unsigned long elf_brk;
842 	bool brk_moved = false;
843 	int retval, i;
844 	unsigned long elf_entry;
845 	unsigned long e_entry;
846 	unsigned long interp_load_addr = 0;
847 	unsigned long start_code, end_code, start_data, end_data;
848 	unsigned long reloc_func_desc __maybe_unused = 0;
849 	int executable_stack = EXSTACK_DEFAULT;
850 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
851 	struct elfhdr *interp_elf_ex = NULL;
852 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
853 	struct mm_struct *mm;
854 	struct pt_regs *regs;
855 
856 	retval = -ENOEXEC;
857 	/* First of all, some simple consistency checks */
858 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
859 		goto out;
860 
861 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
862 		goto out;
863 	if (!elf_check_arch(elf_ex))
864 		goto out;
865 	if (elf_check_fdpic(elf_ex))
866 		goto out;
867 	if (!can_mmap_file(bprm->file))
868 		goto out;
869 
870 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
871 	if (!elf_phdata)
872 		goto out;
873 
874 	elf_ppnt = elf_phdata;
875 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
876 		char *elf_interpreter;
877 
878 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
879 			elf_property_phdata = elf_ppnt;
880 			continue;
881 		}
882 
883 		if (elf_ppnt->p_type != PT_INTERP)
884 			continue;
885 
886 		/*
887 		 * This is the program interpreter used for shared libraries -
888 		 * for now assume that this is an a.out format binary.
889 		 */
890 		retval = -ENOEXEC;
891 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
892 			goto out_free_ph;
893 
894 		retval = -ENOMEM;
895 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
896 		if (!elf_interpreter)
897 			goto out_free_ph;
898 
899 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
900 				  elf_ppnt->p_offset);
901 		if (retval < 0)
902 			goto out_free_interp;
903 		/* make sure path is NULL terminated */
904 		retval = -ENOEXEC;
905 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
906 			goto out_free_interp;
907 
908 		interpreter = open_exec(elf_interpreter);
909 		kfree(elf_interpreter);
910 		retval = PTR_ERR(interpreter);
911 		if (IS_ERR(interpreter))
912 			goto out_free_ph;
913 
914 		/*
915 		 * If the binary is not readable then enforce mm->dumpable = 0
916 		 * regardless of the interpreter's permissions.
917 		 */
918 		would_dump(bprm, interpreter);
919 
920 		interp_elf_ex = kmalloc_obj(*interp_elf_ex);
921 		if (!interp_elf_ex) {
922 			retval = -ENOMEM;
923 			goto out_free_file;
924 		}
925 
926 		/* Get the exec headers */
927 		retval = elf_read(interpreter, interp_elf_ex,
928 				  sizeof(*interp_elf_ex), 0);
929 		if (retval < 0)
930 			goto out_free_dentry;
931 
932 		break;
933 
934 out_free_interp:
935 		kfree(elf_interpreter);
936 		goto out_free_ph;
937 	}
938 
939 	elf_ppnt = elf_phdata;
940 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
941 		switch (elf_ppnt->p_type) {
942 		case PT_GNU_STACK:
943 			if (elf_ppnt->p_flags & PF_X)
944 				executable_stack = EXSTACK_ENABLE_X;
945 			else
946 				executable_stack = EXSTACK_DISABLE_X;
947 			break;
948 
949 		case PT_LOPROC ... PT_HIPROC:
950 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
951 						  bprm->file, false,
952 						  &arch_state);
953 			if (retval)
954 				goto out_free_dentry;
955 			break;
956 		}
957 
958 	/* Some simple consistency checks for the interpreter */
959 	if (interpreter) {
960 		retval = -ELIBBAD;
961 		/* Not an ELF interpreter */
962 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
963 			goto out_free_dentry;
964 		/* Verify the interpreter has a valid arch */
965 		if (!elf_check_arch(interp_elf_ex) ||
966 		    elf_check_fdpic(interp_elf_ex))
967 			goto out_free_dentry;
968 
969 		/* Load the interpreter program headers */
970 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
971 						   interpreter);
972 		if (!interp_elf_phdata)
973 			goto out_free_dentry;
974 
975 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
976 		elf_property_phdata = NULL;
977 		elf_ppnt = interp_elf_phdata;
978 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
979 			switch (elf_ppnt->p_type) {
980 			case PT_GNU_PROPERTY:
981 				elf_property_phdata = elf_ppnt;
982 				break;
983 
984 			case PT_LOPROC ... PT_HIPROC:
985 				retval = arch_elf_pt_proc(interp_elf_ex,
986 							  elf_ppnt, interpreter,
987 							  true, &arch_state);
988 				if (retval)
989 					goto out_free_dentry;
990 				break;
991 			}
992 	}
993 
994 	retval = parse_elf_properties(interpreter ?: bprm->file,
995 				      elf_property_phdata, &arch_state);
996 	if (retval)
997 		goto out_free_dentry;
998 
999 	/*
1000 	 * Allow arch code to reject the ELF at this point, whilst it's
1001 	 * still possible to return an error to the code that invoked
1002 	 * the exec syscall.
1003 	 */
1004 	retval = arch_check_elf(elf_ex,
1005 				!!interpreter, interp_elf_ex,
1006 				&arch_state);
1007 	if (retval)
1008 		goto out_free_dentry;
1009 
1010 	/* Flush all traces of the currently running executable */
1011 	retval = begin_new_exec(bprm);
1012 	if (retval)
1013 		goto out_free_dentry;
1014 
1015 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1016 	   may depend on the personality.  */
1017 	SET_PERSONALITY2(*elf_ex, &arch_state);
1018 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1019 		current->personality |= READ_IMPLIES_EXEC;
1020 
1021 	const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1022 	if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1023 		current->flags |= PF_RANDOMIZE;
1024 
1025 	setup_new_exec(bprm);
1026 
1027 	/* Do this so that we can load the interpreter, if need be.  We will
1028 	   change some of these later */
1029 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1030 				 executable_stack);
1031 	if (retval < 0)
1032 		goto out_free_dentry;
1033 
1034 	elf_brk = 0;
1035 
1036 	start_code = ~0UL;
1037 	end_code = 0;
1038 	start_data = 0;
1039 	end_data = 0;
1040 
1041 	/* Now we do a little grungy work by mmapping the ELF image into
1042 	   the correct location in memory. */
1043 	for(i = 0, elf_ppnt = elf_phdata;
1044 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1045 		int elf_prot, elf_flags;
1046 		unsigned long k, vaddr;
1047 		unsigned long total_size = 0;
1048 		unsigned long alignment;
1049 
1050 		if (elf_ppnt->p_type != PT_LOAD)
1051 			continue;
1052 
1053 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1054 				     !!interpreter, false);
1055 
1056 		elf_flags = MAP_PRIVATE;
1057 
1058 		vaddr = elf_ppnt->p_vaddr;
1059 		/*
1060 		 * The first time through the loop, first_pt_load is true:
1061 		 * layout will be calculated. Once set, use MAP_FIXED since
1062 		 * we know we've already safely mapped the entire region with
1063 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1064 		 */
1065 		if (!first_pt_load) {
1066 			elf_flags |= MAP_FIXED;
1067 		} else if (elf_ex->e_type == ET_EXEC) {
1068 			/*
1069 			 * This logic is run once for the first LOAD Program
1070 			 * Header for ET_EXEC binaries. No special handling
1071 			 * is needed.
1072 			 */
1073 			elf_flags |= MAP_FIXED_NOREPLACE;
1074 		} else if (elf_ex->e_type == ET_DYN) {
1075 			/*
1076 			 * This logic is run once for the first LOAD Program
1077 			 * Header for ET_DYN binaries to calculate the
1078 			 * randomization (load_bias) for all the LOAD
1079 			 * Program Headers.
1080 			 */
1081 
1082 			/*
1083 			 * Calculate the entire size of the ELF mapping
1084 			 * (total_size), used for the initial mapping,
1085 			 * due to load_addr_set which is set to true later
1086 			 * once the initial mapping is performed.
1087 			 *
1088 			 * Note that this is only sensible when the LOAD
1089 			 * segments are contiguous (or overlapping). If
1090 			 * used for LOADs that are far apart, this would
1091 			 * cause the holes between LOADs to be mapped,
1092 			 * running the risk of having the mapping fail,
1093 			 * as it would be larger than the ELF file itself.
1094 			 *
1095 			 * As a result, only ET_DYN does this, since
1096 			 * some ET_EXEC (e.g. ia64) may have large virtual
1097 			 * memory holes between LOADs.
1098 			 *
1099 			 */
1100 			total_size = total_mapping_size(elf_phdata,
1101 							elf_ex->e_phnum);
1102 			if (!total_size) {
1103 				retval = -EINVAL;
1104 				goto out_free_dentry;
1105 			}
1106 
1107 			/* Calculate any requested alignment. */
1108 			alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1109 
1110 			/**
1111 			 * DOC: PIE handling
1112 			 *
1113 			 * There are effectively two types of ET_DYN ELF
1114 			 * binaries: programs (i.e. PIE: ET_DYN with
1115 			 * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
1116 			 * without PT_INTERP, usually the ELF interpreter
1117 			 * itself). Loaders must be loaded away from programs
1118 			 * since the program may otherwise collide with the
1119 			 * loader (especially for ET_EXEC which does not have
1120 			 * a randomized position).
1121 			 *
1122 			 * For example, to handle invocations of
1123 			 * "./ld.so someprog" to test out a new version of
1124 			 * the loader, the subsequent program that the
1125 			 * loader loads must avoid the loader itself, so
1126 			 * they cannot share the same load range. Sufficient
1127 			 * room for the brk must be allocated with the
1128 			 * loader as well, since brk must be available with
1129 			 * the loader.
1130 			 *
1131 			 * Therefore, programs are loaded offset from
1132 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1133 			 * independently randomized mmap region (0 load_bias
1134 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1135 			 *
1136 			 * See below for "brk" handling details, which is
1137 			 * also affected by program vs loader and ASLR.
1138 			 */
1139 			if (interpreter) {
1140 				/* On ET_DYN with PT_INTERP, we do the ASLR. */
1141 				load_bias = ELF_ET_DYN_BASE;
1142 				if (current->flags & PF_RANDOMIZE)
1143 					load_bias += arch_mmap_rnd();
1144 				/* Adjust alignment as requested. */
1145 				if (alignment)
1146 					load_bias &= ~(alignment - 1);
1147 				elf_flags |= MAP_FIXED_NOREPLACE;
1148 			} else {
1149 				/*
1150 				 * For ET_DYN without PT_INTERP, we rely on
1151 				 * the architectures's (potentially ASLR) mmap
1152 				 * base address (via a load_bias of 0).
1153 				 *
1154 				 * When a large alignment is requested, we
1155 				 * must do the allocation at address "0" right
1156 				 * now to discover where things will load so
1157 				 * that we can adjust the resulting alignment.
1158 				 * In this case (load_bias != 0), we can use
1159 				 * MAP_FIXED_NOREPLACE to make sure the mapping
1160 				 * doesn't collide with anything.
1161 				 */
1162 				if (alignment > ELF_MIN_ALIGN) {
1163 					load_bias = elf_load(bprm->file, 0, elf_ppnt,
1164 							     elf_prot, elf_flags, total_size);
1165 					if (BAD_ADDR(load_bias)) {
1166 						retval = IS_ERR_VALUE(load_bias) ?
1167 							 PTR_ERR((void*)load_bias) : -EINVAL;
1168 						goto out_free_dentry;
1169 					}
1170 					vm_munmap(load_bias, total_size);
1171 					/* Adjust alignment as requested. */
1172 					if (alignment)
1173 						load_bias &= ~(alignment - 1);
1174 					elf_flags |= MAP_FIXED_NOREPLACE;
1175 				} else
1176 					load_bias = 0;
1177 			}
1178 
1179 			/*
1180 			 * Since load_bias is used for all subsequent loading
1181 			 * calculations, we must lower it by the first vaddr
1182 			 * so that the remaining calculations based on the
1183 			 * ELF vaddrs will be correctly offset. The result
1184 			 * is then page aligned.
1185 			 */
1186 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1187 		}
1188 
1189 		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1190 				elf_prot, elf_flags, total_size);
1191 		if (BAD_ADDR(error)) {
1192 			retval = IS_ERR_VALUE(error) ?
1193 				PTR_ERR((void*)error) : -EINVAL;
1194 			goto out_free_dentry;
1195 		}
1196 
1197 		if (first_pt_load) {
1198 			first_pt_load = 0;
1199 			if (elf_ex->e_type == ET_DYN) {
1200 				load_bias += error -
1201 				             ELF_PAGESTART(load_bias + vaddr);
1202 				reloc_func_desc = load_bias;
1203 			}
1204 		}
1205 
1206 		/*
1207 		 * Figure out which segment in the file contains the Program
1208 		 * Header table, and map to the associated memory address.
1209 		 */
1210 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1211 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1212 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1213 				    elf_ppnt->p_vaddr;
1214 		}
1215 
1216 		k = elf_ppnt->p_vaddr;
1217 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1218 			start_code = k;
1219 		if (start_data < k)
1220 			start_data = k;
1221 
1222 		/*
1223 		 * Check to see if the section's size will overflow the
1224 		 * allowed task size. Note that p_filesz must always be
1225 		 * <= p_memsz so it is only necessary to check p_memsz.
1226 		 */
1227 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1228 		    elf_ppnt->p_memsz > TASK_SIZE ||
1229 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1230 			/* set_brk can never work. Avoid overflows. */
1231 			retval = -EINVAL;
1232 			goto out_free_dentry;
1233 		}
1234 
1235 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1236 
1237 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1238 			end_code = k;
1239 		if (end_data < k)
1240 			end_data = k;
1241 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1242 		if (k > elf_brk)
1243 			elf_brk = k;
1244 	}
1245 
1246 	e_entry = elf_ex->e_entry + load_bias;
1247 	phdr_addr += load_bias;
1248 	elf_brk += load_bias;
1249 	start_code += load_bias;
1250 	end_code += load_bias;
1251 	start_data += load_bias;
1252 	end_data += load_bias;
1253 
1254 	if (interpreter) {
1255 		elf_entry = load_elf_interp(interp_elf_ex,
1256 					    interpreter,
1257 					    load_bias, interp_elf_phdata,
1258 					    &arch_state);
1259 		if (!IS_ERR_VALUE(elf_entry)) {
1260 			/*
1261 			 * load_elf_interp() returns relocation
1262 			 * adjustment
1263 			 */
1264 			interp_load_addr = elf_entry;
1265 			elf_entry += interp_elf_ex->e_entry;
1266 		}
1267 		if (BAD_ADDR(elf_entry)) {
1268 			retval = IS_ERR_VALUE(elf_entry) ?
1269 					(int)elf_entry : -EINVAL;
1270 			goto out_free_dentry;
1271 		}
1272 		reloc_func_desc = interp_load_addr;
1273 
1274 		exe_file_allow_write_access(interpreter);
1275 		fput(interpreter);
1276 
1277 		kfree(interp_elf_ex);
1278 		kfree(interp_elf_phdata);
1279 	} else {
1280 		elf_entry = e_entry;
1281 		if (BAD_ADDR(elf_entry)) {
1282 			retval = -EINVAL;
1283 			goto out_free_dentry;
1284 		}
1285 	}
1286 
1287 	kfree(elf_phdata);
1288 
1289 	set_binfmt(&elf_format);
1290 
1291 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1292 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1293 	if (retval < 0)
1294 		goto out;
1295 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1296 
1297 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1298 				   e_entry, phdr_addr);
1299 	if (retval < 0)
1300 		goto out;
1301 
1302 	mm = current->mm;
1303 	mm->end_code = end_code;
1304 	mm->start_code = start_code;
1305 	mm->start_data = start_data;
1306 	mm->end_data = end_data;
1307 	mm->start_stack = bprm->p;
1308 
1309 	elf_coredump_set_mm_eflags(mm, elf_ex->e_flags);
1310 
1311 	/**
1312 	 * DOC: "brk" handling
1313 	 *
1314 	 * For architectures with ELF randomization, when executing a
1315 	 * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
1316 	 * move the brk area out of the mmap region and into the unused
1317 	 * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
1318 	 * early with the stack growing down or other regions being put
1319 	 * into the mmap region by the kernel (e.g. vdso).
1320 	 *
1321 	 * In the CONFIG_COMPAT_BRK case, though, everything is turned
1322 	 * off because we're not allowed to move the brk at all.
1323 	 */
1324 	if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
1325 	    IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1326 	    elf_ex->e_type == ET_DYN && !interpreter) {
1327 		elf_brk = ELF_ET_DYN_BASE;
1328 		/* This counts as moving the brk, so let brk(2) know. */
1329 		brk_moved = true;
1330 	}
1331 	mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
1332 
1333 	if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
1334 		/*
1335 		 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
1336 		 * leave a gap between .bss and brk.
1337 		 */
1338 		if (!brk_moved)
1339 			mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1340 
1341 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1342 		brk_moved = true;
1343 	}
1344 
1345 #ifdef compat_brk_randomized
1346 	if (brk_moved)
1347 		current->brk_randomized = 1;
1348 #endif
1349 
1350 	if (current->personality & MMAP_PAGE_ZERO) {
1351 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1352 		   and some applications "depend" upon this behavior.
1353 		   Since we do not have the power to recompile these, we
1354 		   emulate the SVr4 behavior. Sigh. */
1355 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1356 				MAP_FIXED | MAP_PRIVATE, 0);
1357 
1358 		retval = do_mseal(0, PAGE_SIZE, 0);
1359 		if (retval)
1360 			pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
1361 					    task_pid_nr(current), retval);
1362 	}
1363 
1364 	regs = current_pt_regs();
1365 #ifdef ELF_PLAT_INIT
1366 	/*
1367 	 * The ABI may specify that certain registers be set up in special
1368 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1369 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1370 	 * that the e_entry field is the address of the function descriptor
1371 	 * for the startup routine, rather than the address of the startup
1372 	 * routine itself.  This macro performs whatever initialization to
1373 	 * the regs structure is required as well as any relocations to the
1374 	 * function descriptor entries when executing dynamically links apps.
1375 	 */
1376 	ELF_PLAT_INIT(regs, reloc_func_desc);
1377 #endif
1378 
1379 	finalize_exec(bprm);
1380 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1381 	retval = 0;
1382 out:
1383 	return retval;
1384 
1385 	/* error cleanup */
1386 out_free_dentry:
1387 	kfree(interp_elf_ex);
1388 	kfree(interp_elf_phdata);
1389 out_free_file:
1390 	exe_file_allow_write_access(interpreter);
1391 	if (interpreter)
1392 		fput(interpreter);
1393 out_free_ph:
1394 	kfree(elf_phdata);
1395 	goto out;
1396 }
1397 
1398 #ifdef CONFIG_ELF_CORE
1399 /*
1400  * ELF core dumper
1401  *
1402  * Modelled on fs/exec.c:aout_core_dump()
1403  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1404  */
1405 
1406 /* An ELF note in memory */
1407 struct memelfnote
1408 {
1409 	const char *name;
1410 	int type;
1411 	unsigned int datasz;
1412 	void *data;
1413 };
1414 
notesize(struct memelfnote * en)1415 static int notesize(struct memelfnote *en)
1416 {
1417 	int sz;
1418 
1419 	sz = sizeof(struct elf_note);
1420 	sz += roundup(strlen(en->name) + 1, 4);
1421 	sz += roundup(en->datasz, 4);
1422 
1423 	return sz;
1424 }
1425 
writenote(struct memelfnote * men,struct coredump_params * cprm)1426 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1427 {
1428 	struct elf_note en;
1429 	en.n_namesz = strlen(men->name) + 1;
1430 	en.n_descsz = men->datasz;
1431 	en.n_type = men->type;
1432 
1433 	return dump_emit(cprm, &en, sizeof(en)) &&
1434 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1435 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1436 }
1437 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1438 static void fill_elf_header(struct elfhdr *elf, int segs,
1439 			    u16 machine, u32 flags)
1440 {
1441 	memset(elf, 0, sizeof(*elf));
1442 
1443 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1444 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1445 	elf->e_ident[EI_DATA] = ELF_DATA;
1446 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1447 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1448 
1449 	elf->e_type = ET_CORE;
1450 	elf->e_machine = machine;
1451 	elf->e_version = EV_CURRENT;
1452 	elf->e_phoff = sizeof(struct elfhdr);
1453 	elf->e_flags = flags;
1454 	elf->e_ehsize = sizeof(struct elfhdr);
1455 	elf->e_phentsize = sizeof(struct elf_phdr);
1456 	elf->e_phnum = segs;
1457 }
1458 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1459 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1460 {
1461 	phdr->p_type = PT_NOTE;
1462 	phdr->p_offset = offset;
1463 	phdr->p_vaddr = 0;
1464 	phdr->p_paddr = 0;
1465 	phdr->p_filesz = sz;
1466 	phdr->p_memsz = 0;
1467 	phdr->p_flags = 0;
1468 	phdr->p_align = 4;
1469 }
1470 
__fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1471 static void __fill_note(struct memelfnote *note, const char *name, int type,
1472 			unsigned int sz, void *data)
1473 {
1474 	note->name = name;
1475 	note->type = type;
1476 	note->datasz = sz;
1477 	note->data = data;
1478 }
1479 
1480 #define fill_note(note, type, sz, data) \
1481 	__fill_note(note, NN_ ## type, NT_ ## type, sz, data)
1482 
1483 /*
1484  * fill up all the fields in prstatus from the given task struct, except
1485  * registers which need to be filled up separately.
1486  */
fill_prstatus(struct elf_prstatus_common * prstatus,struct task_struct * p,long signr)1487 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1488 		struct task_struct *p, long signr)
1489 {
1490 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1491 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1492 	prstatus->pr_sighold = p->blocked.sig[0];
1493 	rcu_read_lock();
1494 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1495 	rcu_read_unlock();
1496 	prstatus->pr_pid = task_pid_vnr(p);
1497 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1498 	prstatus->pr_sid = task_session_vnr(p);
1499 	if (thread_group_leader(p)) {
1500 		struct task_cputime cputime;
1501 
1502 		/*
1503 		 * This is the record for the group leader.  It shows the
1504 		 * group-wide total, not its individual thread total.
1505 		 */
1506 		thread_group_cputime(p, &cputime);
1507 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1508 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1509 	} else {
1510 		u64 utime, stime;
1511 
1512 		task_cputime(p, &utime, &stime);
1513 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1514 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1515 	}
1516 
1517 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1518 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1519 }
1520 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1521 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1522 		       struct mm_struct *mm)
1523 {
1524 	const struct cred *cred;
1525 	unsigned int i, len;
1526 	unsigned int state;
1527 
1528 	/* first copy the parameters from user space */
1529 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1530 
1531 	len = mm->arg_end - mm->arg_start;
1532 	if (len >= ELF_PRARGSZ)
1533 		len = ELF_PRARGSZ-1;
1534 	if (copy_from_user(&psinfo->pr_psargs,
1535 		           (const char __user *)mm->arg_start, len))
1536 		return -EFAULT;
1537 	for(i = 0; i < len; i++)
1538 		if (psinfo->pr_psargs[i] == 0)
1539 			psinfo->pr_psargs[i] = ' ';
1540 	psinfo->pr_psargs[len] = 0;
1541 
1542 	rcu_read_lock();
1543 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1544 	rcu_read_unlock();
1545 	psinfo->pr_pid = task_pid_vnr(p);
1546 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1547 	psinfo->pr_sid = task_session_vnr(p);
1548 
1549 	state = READ_ONCE(p->__state);
1550 	i = state ? ffz(~state) + 1 : 0;
1551 	psinfo->pr_state = i;
1552 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1553 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1554 	psinfo->pr_nice = task_nice(p);
1555 	psinfo->pr_flag = p->flags;
1556 	rcu_read_lock();
1557 	cred = __task_cred(p);
1558 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1559 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1560 	rcu_read_unlock();
1561 	get_task_comm(psinfo->pr_fname, p);
1562 
1563 	return 0;
1564 }
1565 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1566 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1567 {
1568 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1569 	int i = 0;
1570 	do
1571 		i += 2;
1572 	while (auxv[i - 2] != AT_NULL);
1573 	fill_note(note, AUXV, i * sizeof(elf_addr_t), auxv);
1574 }
1575 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const kernel_siginfo_t * siginfo)1576 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1577 		const kernel_siginfo_t *siginfo)
1578 {
1579 	copy_siginfo_to_external(csigdata, siginfo);
1580 	fill_note(note, SIGINFO, sizeof(*csigdata), csigdata);
1581 }
1582 
1583 /*
1584  * Format of NT_FILE note:
1585  *
1586  * long count     -- how many files are mapped
1587  * long page_size -- units for file_ofs
1588  * array of [COUNT] elements of
1589  *   long start
1590  *   long end
1591  *   long file_ofs
1592  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1593  */
fill_files_note(struct memelfnote * note,struct coredump_params * cprm)1594 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1595 {
1596 	unsigned count, size, names_ofs, remaining, n;
1597 	user_long_t *data;
1598 	user_long_t *start_end_ofs;
1599 	char *name_base, *name_curpos;
1600 	int i;
1601 
1602 	/* *Estimated* file count and total data size needed */
1603 	count = cprm->vma_count;
1604 	if (count > UINT_MAX / 64)
1605 		return -EINVAL;
1606 	size = count * 64;
1607 
1608 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1609  alloc:
1610 	/* paranoia check */
1611 	if (size >= core_file_note_size_limit) {
1612 		pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1613 			      size);
1614 		return -EINVAL;
1615 	}
1616 	size = round_up(size, PAGE_SIZE);
1617 	/*
1618 	 * "size" can be 0 here legitimately.
1619 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1620 	 */
1621 	data = kvmalloc(size, GFP_KERNEL);
1622 	if (ZERO_OR_NULL_PTR(data))
1623 		return -ENOMEM;
1624 
1625 	start_end_ofs = data + 2;
1626 	name_base = name_curpos = ((char *)data) + names_ofs;
1627 	remaining = size - names_ofs;
1628 	count = 0;
1629 	for (i = 0; i < cprm->vma_count; i++) {
1630 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1631 		struct file *file;
1632 		const char *filename;
1633 
1634 		file = m->file;
1635 		if (!file)
1636 			continue;
1637 		filename = file_path(file, name_curpos, remaining);
1638 		if (IS_ERR(filename)) {
1639 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1640 				kvfree(data);
1641 				size = size * 5 / 4;
1642 				goto alloc;
1643 			}
1644 			continue;
1645 		}
1646 
1647 		/* file_path() fills at the end, move name down */
1648 		/* n = strlen(filename) + 1: */
1649 		n = (name_curpos + remaining) - filename;
1650 		remaining = filename - name_curpos;
1651 		memmove(name_curpos, filename, n);
1652 		name_curpos += n;
1653 
1654 		*start_end_ofs++ = m->start;
1655 		*start_end_ofs++ = m->end;
1656 		*start_end_ofs++ = m->pgoff;
1657 		count++;
1658 	}
1659 
1660 	/* Now we know exact count of files, can store it */
1661 	data[0] = count;
1662 	data[1] = PAGE_SIZE;
1663 	/*
1664 	 * Count usually is less than mm->map_count,
1665 	 * we need to move filenames down.
1666 	 */
1667 	n = cprm->vma_count - count;
1668 	if (n != 0) {
1669 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1670 		memmove(name_base - shift_bytes, name_base,
1671 			name_curpos - name_base);
1672 		name_curpos -= shift_bytes;
1673 	}
1674 
1675 	size = name_curpos - (char *)data;
1676 	fill_note(note, FILE, size, data);
1677 	return 0;
1678 }
1679 
1680 #include <linux/regset.h>
1681 
1682 struct elf_thread_core_info {
1683 	struct elf_thread_core_info *next;
1684 	struct task_struct *task;
1685 	struct elf_prstatus prstatus;
1686 	struct memelfnote notes[];
1687 };
1688 
1689 struct elf_note_info {
1690 	struct elf_thread_core_info *thread;
1691 	struct memelfnote psinfo;
1692 	struct memelfnote signote;
1693 	struct memelfnote auxv;
1694 	struct memelfnote files;
1695 	user_siginfo_t csigdata;
1696 	size_t size;
1697 	int thread_notes;
1698 };
1699 
1700 #ifdef CORE_DUMP_USE_REGSET
1701 /*
1702  * When a regset has a writeback hook, we call it on each thread before
1703  * dumping user memory.  On register window machines, this makes sure the
1704  * user memory backing the register data is up to date before we read it.
1705  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1706 static void do_thread_regset_writeback(struct task_struct *task,
1707 				       const struct user_regset *regset)
1708 {
1709 	if (regset->writeback)
1710 		regset->writeback(task, regset, 1);
1711 }
1712 
1713 #ifndef PRSTATUS_SIZE
1714 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1715 #endif
1716 
1717 #ifndef SET_PR_FPVALID
1718 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1719 #endif
1720 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1721 static int fill_thread_core_info(struct elf_thread_core_info *t,
1722 				 const struct user_regset_view *view,
1723 				 long signr, struct elf_note_info *info)
1724 {
1725 	unsigned int note_iter, view_iter;
1726 
1727 	/*
1728 	 * NT_PRSTATUS is the one special case, because the regset data
1729 	 * goes into the pr_reg field inside the note contents, rather
1730 	 * than being the whole note contents.  We fill the regset in here.
1731 	 * We assume that regset 0 is NT_PRSTATUS.
1732 	 */
1733 	fill_prstatus(&t->prstatus.common, t->task, signr);
1734 	regset_get(t->task, &view->regsets[0],
1735 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1736 
1737 	fill_note(&t->notes[0], PRSTATUS, PRSTATUS_SIZE, &t->prstatus);
1738 	info->size += notesize(&t->notes[0]);
1739 
1740 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1741 
1742 	/*
1743 	 * Each other regset might generate a note too.  For each regset
1744 	 * that has no core_note_type or is inactive, skip it.
1745 	 */
1746 	note_iter = 1;
1747 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1748 		const struct user_regset *regset = &view->regsets[view_iter];
1749 		int note_type = regset->core_note_type;
1750 		const char *note_name = regset->core_note_name;
1751 		bool is_fpreg = note_type == NT_PRFPREG;
1752 		void *data;
1753 		int ret;
1754 
1755 		do_thread_regset_writeback(t->task, regset);
1756 		if (!note_type) // not for coredumps
1757 			continue;
1758 		if (regset->active && regset->active(t->task, regset) <= 0)
1759 			continue;
1760 
1761 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1762 		if (ret < 0)
1763 			continue;
1764 
1765 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1766 			break;
1767 
1768 		if (is_fpreg)
1769 			SET_PR_FPVALID(&t->prstatus);
1770 
1771 		/* There should be a note name, but if not, guess: */
1772 		if (WARN_ON_ONCE(!note_name))
1773 			note_name = "LINUX";
1774 		else
1775 			/* Warn on non-legacy-compatible names, for now. */
1776 			WARN_ON_ONCE(strcmp(note_name,
1777 					    is_fpreg ? "CORE" : "LINUX"));
1778 
1779 		__fill_note(&t->notes[note_iter], note_name, note_type,
1780 			    ret, data);
1781 
1782 		info->size += notesize(&t->notes[note_iter]);
1783 		note_iter++;
1784 	}
1785 
1786 	return 1;
1787 }
1788 #else
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1789 static int fill_thread_core_info(struct elf_thread_core_info *t,
1790 				 const struct user_regset_view *view,
1791 				 long signr, struct elf_note_info *info)
1792 {
1793 	struct task_struct *p = t->task;
1794 	elf_fpregset_t *fpu;
1795 
1796 	fill_prstatus(&t->prstatus.common, p, signr);
1797 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1798 
1799 	fill_note(&t->notes[0], PRSTATUS, sizeof(t->prstatus), &t->prstatus);
1800 	info->size += notesize(&t->notes[0]);
1801 
1802 	fpu = kzalloc_obj(elf_fpregset_t);
1803 	if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1804 		kfree(fpu);
1805 		return 1;
1806 	}
1807 
1808 	t->prstatus.pr_fpvalid = 1;
1809 	fill_note(&t->notes[1], PRFPREG, sizeof(*fpu), fpu);
1810 	info->size += notesize(&t->notes[1]);
1811 
1812 	return 1;
1813 }
1814 #endif
1815 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,struct coredump_params * cprm)1816 static int fill_note_info(struct elfhdr *elf, int phdrs,
1817 			  struct elf_note_info *info,
1818 			  struct coredump_params *cprm)
1819 {
1820 	struct task_struct *dump_task = current;
1821 	const struct user_regset_view *view;
1822 	struct elf_thread_core_info *t;
1823 	struct elf_prpsinfo *psinfo;
1824 	struct core_thread *ct;
1825 	u16 machine;
1826 	u32 flags;
1827 
1828 	psinfo = kmalloc_obj(*psinfo);
1829 	if (!psinfo)
1830 		return 0;
1831 	fill_note(&info->psinfo, PRPSINFO, sizeof(*psinfo), psinfo);
1832 
1833 #ifdef CORE_DUMP_USE_REGSET
1834 	view = task_user_regset_view(dump_task);
1835 
1836 	/*
1837 	 * Figure out how many notes we're going to need for each thread.
1838 	 */
1839 	info->thread_notes = 0;
1840 	for (int i = 0; i < view->n; ++i)
1841 		if (view->regsets[i].core_note_type != 0)
1842 			++info->thread_notes;
1843 
1844 	/*
1845 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1846 	 * since it is our one special case.
1847 	 */
1848 	if (unlikely(info->thread_notes == 0) ||
1849 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1850 		WARN_ON(1);
1851 		return 0;
1852 	}
1853 
1854 	machine = view->e_machine;
1855 	flags = view->e_flags;
1856 #else
1857 	view = NULL;
1858 	info->thread_notes = 2;
1859 	machine = ELF_ARCH;
1860 	flags = ELF_CORE_EFLAGS;
1861 #endif
1862 
1863 	/*
1864 	 * Override ELF e_flags with value taken from process,
1865 	 * if arch needs that.
1866 	 */
1867 	flags = elf_coredump_get_mm_eflags(dump_task->mm, flags);
1868 
1869 	/*
1870 	 * Initialize the ELF file header.
1871 	 */
1872 	fill_elf_header(elf, phdrs, machine, flags);
1873 
1874 	/*
1875 	 * Allocate a structure for each thread.
1876 	 */
1877 	info->thread = kzalloc_flex(*info->thread, notes, info->thread_notes);
1878 	if (unlikely(!info->thread))
1879 		return 0;
1880 
1881 	info->thread->task = dump_task;
1882 	for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1883 		t = kzalloc_flex(*t, notes, info->thread_notes);
1884 		if (unlikely(!t))
1885 			return 0;
1886 
1887 		t->task = ct->task;
1888 		t->next = info->thread->next;
1889 		info->thread->next = t;
1890 	}
1891 
1892 	/*
1893 	 * Now fill in each thread's information.
1894 	 */
1895 	for (t = info->thread; t != NULL; t = t->next)
1896 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1897 			return 0;
1898 
1899 	/*
1900 	 * Fill in the two process-wide notes.
1901 	 */
1902 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1903 	info->size += notesize(&info->psinfo);
1904 
1905 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1906 	info->size += notesize(&info->signote);
1907 
1908 	fill_auxv_note(&info->auxv, current->mm);
1909 	info->size += notesize(&info->auxv);
1910 
1911 	if (fill_files_note(&info->files, cprm) == 0)
1912 		info->size += notesize(&info->files);
1913 
1914 	return 1;
1915 }
1916 
1917 /*
1918  * Write all the notes for each thread.  When writing the first thread, the
1919  * process-wide notes are interleaved after the first thread-specific note.
1920  */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1921 static int write_note_info(struct elf_note_info *info,
1922 			   struct coredump_params *cprm)
1923 {
1924 	bool first = true;
1925 	struct elf_thread_core_info *t = info->thread;
1926 
1927 	do {
1928 		int i;
1929 
1930 		if (!writenote(&t->notes[0], cprm))
1931 			return 0;
1932 
1933 		if (first && !writenote(&info->psinfo, cprm))
1934 			return 0;
1935 		if (first && !writenote(&info->signote, cprm))
1936 			return 0;
1937 		if (first && !writenote(&info->auxv, cprm))
1938 			return 0;
1939 		if (first && info->files.data &&
1940 				!writenote(&info->files, cprm))
1941 			return 0;
1942 
1943 		for (i = 1; i < info->thread_notes; ++i)
1944 			if (t->notes[i].data &&
1945 			    !writenote(&t->notes[i], cprm))
1946 				return 0;
1947 
1948 		first = false;
1949 		t = t->next;
1950 	} while (t);
1951 
1952 	return 1;
1953 }
1954 
free_note_info(struct elf_note_info * info)1955 static void free_note_info(struct elf_note_info *info)
1956 {
1957 	struct elf_thread_core_info *threads = info->thread;
1958 	while (threads) {
1959 		unsigned int i;
1960 		struct elf_thread_core_info *t = threads;
1961 		threads = t->next;
1962 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1963 		for (i = 1; i < info->thread_notes; ++i)
1964 			kvfree(t->notes[i].data);
1965 		kfree(t);
1966 	}
1967 	kfree(info->psinfo.data);
1968 	kvfree(info->files.data);
1969 }
1970 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)1971 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1972 			     elf_addr_t e_shoff, int segs)
1973 {
1974 	elf->e_shoff = e_shoff;
1975 	elf->e_shentsize = sizeof(*shdr4extnum);
1976 	elf->e_shnum = 1;
1977 	elf->e_shstrndx = SHN_UNDEF;
1978 
1979 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1980 
1981 	shdr4extnum->sh_type = SHT_NULL;
1982 	shdr4extnum->sh_size = elf->e_shnum;
1983 	shdr4extnum->sh_link = elf->e_shstrndx;
1984 	shdr4extnum->sh_info = segs;
1985 }
1986 
1987 /*
1988  * Actual dumper
1989  *
1990  * This is a two-pass process; first we find the offsets of the bits,
1991  * and then they are actually written out.  If we run out of core limit
1992  * we just truncate.
1993  */
elf_core_dump(struct coredump_params * cprm)1994 static int elf_core_dump(struct coredump_params *cprm)
1995 {
1996 	int has_dumped = 0;
1997 	int segs, i;
1998 	struct elfhdr elf;
1999 	loff_t offset = 0, dataoff;
2000 	struct elf_note_info info = { };
2001 	struct elf_phdr *phdr4note = NULL;
2002 	struct elf_shdr *shdr4extnum = NULL;
2003 	Elf_Half e_phnum;
2004 	elf_addr_t e_shoff;
2005 
2006 	/*
2007 	 * The number of segs are recored into ELF header as 16bit value.
2008 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2009 	 */
2010 	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
2011 
2012 	/* for notes section */
2013 	segs++;
2014 
2015 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2016 	 * this, kernel supports extended numbering. Have a look at
2017 	 * include/linux/elf.h for further information. */
2018 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2019 
2020 	/*
2021 	 * Collect all the non-memory information about the process for the
2022 	 * notes.  This also sets up the file header.
2023 	 */
2024 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2025 		goto end_coredump;
2026 
2027 	has_dumped = 1;
2028 
2029 	offset += sizeof(elf);				/* ELF header */
2030 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2031 
2032 	/* Write notes phdr entry */
2033 	{
2034 		size_t sz = info.size;
2035 
2036 		/* For cell spufs and x86 xstate */
2037 		sz += elf_coredump_extra_notes_size();
2038 
2039 		phdr4note = kmalloc_obj(*phdr4note);
2040 		if (!phdr4note)
2041 			goto end_coredump;
2042 
2043 		fill_elf_note_phdr(phdr4note, sz, offset);
2044 		offset += sz;
2045 	}
2046 
2047 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2048 
2049 	offset += cprm->vma_data_size;
2050 	offset += elf_core_extra_data_size(cprm);
2051 	e_shoff = offset;
2052 
2053 	if (e_phnum == PN_XNUM) {
2054 		shdr4extnum = kmalloc_obj(*shdr4extnum);
2055 		if (!shdr4extnum)
2056 			goto end_coredump;
2057 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2058 	}
2059 
2060 	offset = dataoff;
2061 
2062 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2063 		goto end_coredump;
2064 
2065 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2066 		goto end_coredump;
2067 
2068 	/* Write program headers for segments dump */
2069 	for (i = 0; i < cprm->vma_count; i++) {
2070 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2071 		struct elf_phdr phdr;
2072 
2073 		phdr.p_type = PT_LOAD;
2074 		phdr.p_offset = offset;
2075 		phdr.p_vaddr = meta->start;
2076 		phdr.p_paddr = 0;
2077 		phdr.p_filesz = meta->dump_size;
2078 		phdr.p_memsz = meta->end - meta->start;
2079 		offset += phdr.p_filesz;
2080 		phdr.p_flags = 0;
2081 		if (meta->flags & VM_READ)
2082 			phdr.p_flags |= PF_R;
2083 		if (meta->flags & VM_WRITE)
2084 			phdr.p_flags |= PF_W;
2085 		if (meta->flags & VM_EXEC)
2086 			phdr.p_flags |= PF_X;
2087 		phdr.p_align = ELF_EXEC_PAGESIZE;
2088 
2089 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2090 			goto end_coredump;
2091 	}
2092 
2093 	if (!elf_core_write_extra_phdrs(cprm, offset))
2094 		goto end_coredump;
2095 
2096 	/* write out the notes section */
2097 	if (!write_note_info(&info, cprm))
2098 		goto end_coredump;
2099 
2100 	/* For cell spufs and x86 xstate */
2101 	if (elf_coredump_extra_notes_write(cprm))
2102 		goto end_coredump;
2103 
2104 	/* Align to page */
2105 	dump_skip_to(cprm, dataoff);
2106 
2107 	for (i = 0; i < cprm->vma_count; i++) {
2108 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2109 
2110 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2111 			goto end_coredump;
2112 	}
2113 
2114 	if (!elf_core_write_extra_data(cprm))
2115 		goto end_coredump;
2116 
2117 	if (e_phnum == PN_XNUM) {
2118 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2119 			goto end_coredump;
2120 	}
2121 
2122 end_coredump:
2123 	free_note_info(&info);
2124 	kfree(shdr4extnum);
2125 	kfree(phdr4note);
2126 	return has_dumped;
2127 }
2128 
2129 #endif		/* CONFIG_ELF_CORE */
2130 
init_elf_binfmt(void)2131 static int __init init_elf_binfmt(void)
2132 {
2133 	register_binfmt(&elf_format);
2134 	return 0;
2135 }
2136 
exit_elf_binfmt(void)2137 static void __exit exit_elf_binfmt(void)
2138 {
2139 	/* Remove the COFF and ELF loaders. */
2140 	unregister_binfmt(&elf_format);
2141 }
2142 
2143 core_initcall(init_elf_binfmt);
2144 module_exit(exit_elf_binfmt);
2145 
2146 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2147 #include "tests/binfmt_elf_kunit.c"
2148 #endif
2149