xref: /linux/fs/binfmt_elf.c (revision 50157eaa0c13bb5aac5cc45330bf055d95d4af57)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <linux/rseq.h>
50 #include <asm/param.h>
51 #include <asm/page.h>
52 
53 #ifndef ELF_COMPAT
54 #define ELF_COMPAT 0
55 #endif
56 
57 #ifndef user_long_t
58 #define user_long_t long
59 #endif
60 #ifndef user_siginfo_t
61 #define user_siginfo_t siginfo_t
62 #endif
63 
64 /* That's for binfmt_elf_fdpic to deal with */
65 #ifndef elf_check_fdpic
66 #define elf_check_fdpic(ex) false
67 #endif
68 
69 static int load_elf_binary(struct linux_binprm *bprm);
70 
71 /*
72  * If we don't support core dumping, then supply a NULL so we
73  * don't even try.
74  */
75 #ifdef CONFIG_ELF_CORE
76 static int elf_core_dump(struct coredump_params *cprm);
77 #else
78 #define elf_core_dump	NULL
79 #endif
80 
81 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
82 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
83 #else
84 #define ELF_MIN_ALIGN	PAGE_SIZE
85 #endif
86 
87 #ifndef ELF_CORE_EFLAGS
88 #define ELF_CORE_EFLAGS	0
89 #endif
90 
91 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
92 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
93 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
94 
95 static struct linux_binfmt elf_format = {
96 	.module		= THIS_MODULE,
97 	.load_binary	= load_elf_binary,
98 #ifdef CONFIG_COREDUMP
99 	.core_dump	= elf_core_dump,
100 	.min_coredump	= ELF_EXEC_PAGESIZE,
101 #endif
102 };
103 
104 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
105 
elf_coredump_set_mm_eflags(struct mm_struct * mm,u32 flags)106 static inline void elf_coredump_set_mm_eflags(struct mm_struct *mm, u32 flags)
107 {
108 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
109 	mm->saved_e_flags = flags;
110 #endif
111 }
112 
elf_coredump_get_mm_eflags(struct mm_struct * mm,u32 flags)113 static inline u32 elf_coredump_get_mm_eflags(struct mm_struct *mm, u32 flags)
114 {
115 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
116 	flags = mm->saved_e_flags;
117 #endif
118 	return flags;
119 }
120 
121 /*
122  * We need to explicitly zero any trailing portion of the page that follows
123  * p_filesz when it ends before the page ends (e.g. bss), otherwise this
124  * memory will contain the junk from the file that should not be present.
125  */
padzero(unsigned long address)126 static int padzero(unsigned long address)
127 {
128 	unsigned long nbyte;
129 
130 	nbyte = ELF_PAGEOFFSET(address);
131 	if (nbyte) {
132 		nbyte = ELF_MIN_ALIGN - nbyte;
133 		if (clear_user((void __user *)address, nbyte))
134 			return -EFAULT;
135 	}
136 	return 0;
137 }
138 
139 /* Let's use some macros to make this stack manipulation a little clearer */
140 #ifdef CONFIG_STACK_GROWSUP
141 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
142 #define STACK_ROUND(sp, items) \
143 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
144 #define STACK_ALLOC(sp, len) ({ \
145 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
146 	old_sp; })
147 #else
148 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
149 #define STACK_ROUND(sp, items) \
150 	(((unsigned long) (sp - items)) &~ 15UL)
151 #define STACK_ALLOC(sp, len) (sp -= len)
152 #endif
153 
154 #ifndef ELF_BASE_PLATFORM
155 /*
156  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
157  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
158  * will be copied to the user stack in the same manner as AT_PLATFORM.
159  */
160 #define ELF_BASE_PLATFORM NULL
161 #endif
162 
163 static int
create_elf_tables(struct linux_binprm * bprm,const struct elfhdr * exec,unsigned long interp_load_addr,unsigned long e_entry,unsigned long phdr_addr)164 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
165 		unsigned long interp_load_addr,
166 		unsigned long e_entry, unsigned long phdr_addr)
167 {
168 	struct mm_struct *mm = current->mm;
169 	unsigned long p = bprm->p;
170 	int argc = bprm->argc;
171 	int envc = bprm->envc;
172 	elf_addr_t __user *sp;
173 	elf_addr_t __user *u_platform;
174 	elf_addr_t __user *u_base_platform;
175 	elf_addr_t __user *u_rand_bytes;
176 	const char *k_platform = ELF_PLATFORM;
177 	const char *k_base_platform = ELF_BASE_PLATFORM;
178 	unsigned char k_rand_bytes[16];
179 	int items;
180 	elf_addr_t *elf_info;
181 	elf_addr_t flags = 0;
182 	int ei_index;
183 	const struct cred *cred = current_cred();
184 	struct vm_area_struct *vma;
185 
186 	/*
187 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
188 	 * evictions by the processes running on the same package. One
189 	 * thing we can do is to shuffle the initial stack for them.
190 	 */
191 
192 	p = arch_align_stack(p);
193 
194 	/*
195 	 * If this architecture has a platform capability string, copy it
196 	 * to userspace.  In some cases (Sparc), this info is impossible
197 	 * for userspace to get any other way, in others (i386) it is
198 	 * merely difficult.
199 	 */
200 	u_platform = NULL;
201 	if (k_platform) {
202 		size_t len = strlen(k_platform) + 1;
203 
204 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
205 		if (copy_to_user(u_platform, k_platform, len))
206 			return -EFAULT;
207 	}
208 
209 	/*
210 	 * If this architecture has a "base" platform capability
211 	 * string, copy it to userspace.
212 	 */
213 	u_base_platform = NULL;
214 	if (k_base_platform) {
215 		size_t len = strlen(k_base_platform) + 1;
216 
217 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
218 		if (copy_to_user(u_base_platform, k_base_platform, len))
219 			return -EFAULT;
220 	}
221 
222 	/*
223 	 * Generate 16 random bytes for userspace PRNG seeding.
224 	 */
225 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
226 	u_rand_bytes = (elf_addr_t __user *)
227 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
228 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
229 		return -EFAULT;
230 
231 	/* Create the ELF interpreter info */
232 	elf_info = (elf_addr_t *)mm->saved_auxv;
233 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
234 #define NEW_AUX_ENT(id, val) \
235 	do { \
236 		*elf_info++ = id; \
237 		*elf_info++ = val; \
238 	} while (0)
239 
240 #ifdef ARCH_DLINFO
241 	/*
242 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
243 	 * AUXV.
244 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
245 	 * ARCH_DLINFO changes
246 	 */
247 	ARCH_DLINFO;
248 #endif
249 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
250 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
251 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
252 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
253 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
254 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
255 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
256 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
257 		flags |= AT_FLAGS_PRESERVE_ARGV0;
258 	NEW_AUX_ENT(AT_FLAGS, flags);
259 	NEW_AUX_ENT(AT_ENTRY, e_entry);
260 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
261 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
262 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
263 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
264 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
265 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
266 #ifdef ELF_HWCAP2
267 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
268 #endif
269 #ifdef ELF_HWCAP3
270 	NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
271 #endif
272 #ifdef ELF_HWCAP4
273 	NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
274 #endif
275 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
276 	if (k_platform) {
277 		NEW_AUX_ENT(AT_PLATFORM,
278 			    (elf_addr_t)(unsigned long)u_platform);
279 	}
280 	if (k_base_platform) {
281 		NEW_AUX_ENT(AT_BASE_PLATFORM,
282 			    (elf_addr_t)(unsigned long)u_base_platform);
283 	}
284 	if (bprm->have_execfd) {
285 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
286 	}
287 #ifdef CONFIG_RSEQ
288 	NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
289 	NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
290 #endif
291 #undef NEW_AUX_ENT
292 	/* AT_NULL is zero; clear the rest too */
293 	memset(elf_info, 0, (char *)mm->saved_auxv +
294 			sizeof(mm->saved_auxv) - (char *)elf_info);
295 
296 	/* And advance past the AT_NULL entry.  */
297 	elf_info += 2;
298 
299 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
300 	sp = STACK_ADD(p, ei_index);
301 
302 	items = (argc + 1) + (envc + 1) + 1;
303 	bprm->p = STACK_ROUND(sp, items);
304 
305 	/* Point sp at the lowest address on the stack */
306 #ifdef CONFIG_STACK_GROWSUP
307 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
308 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
309 #else
310 	sp = (elf_addr_t __user *)bprm->p;
311 #endif
312 
313 
314 	/*
315 	 * Grow the stack manually; some architectures have a limit on how
316 	 * far ahead a user-space access may be in order to grow the stack.
317 	 */
318 	if (mmap_write_lock_killable(mm))
319 		return -EINTR;
320 	vma = find_extend_vma_locked(mm, bprm->p);
321 	mmap_write_unlock(mm);
322 	if (!vma)
323 		return -EFAULT;
324 
325 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
326 	if (put_user(argc, sp++))
327 		return -EFAULT;
328 
329 	/* Populate list of argv pointers back to argv strings. */
330 	p = mm->arg_end = mm->arg_start;
331 	while (argc-- > 0) {
332 		size_t len;
333 		if (put_user((elf_addr_t)p, sp++))
334 			return -EFAULT;
335 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336 		if (!len || len > MAX_ARG_STRLEN)
337 			return -EINVAL;
338 		p += len;
339 	}
340 	if (put_user(0, sp++))
341 		return -EFAULT;
342 	mm->arg_end = p;
343 
344 	/* Populate list of envp pointers back to envp strings. */
345 	mm->env_end = mm->env_start = p;
346 	while (envc-- > 0) {
347 		size_t len;
348 		if (put_user((elf_addr_t)p, sp++))
349 			return -EFAULT;
350 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
351 		if (!len || len > MAX_ARG_STRLEN)
352 			return -EINVAL;
353 		p += len;
354 	}
355 	if (put_user(0, sp++))
356 		return -EFAULT;
357 	mm->env_end = p;
358 
359 	/* Put the elf_info on the stack in the right place.  */
360 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
361 		return -EFAULT;
362 	return 0;
363 }
364 
365 /*
366  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
367  * into memory at "addr". (Note that p_filesz is rounded up to the
368  * next page, so any extra bytes from the file must be wiped.)
369  */
elf_map(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)370 static unsigned long elf_map(struct file *filep, unsigned long addr,
371 		const struct elf_phdr *eppnt, int prot, int type,
372 		unsigned long total_size)
373 {
374 	unsigned long map_addr;
375 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
376 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
377 	addr = ELF_PAGESTART(addr);
378 	size = ELF_PAGEALIGN(size);
379 
380 	/* mmap() will return -EINVAL if given a zero size, but a
381 	 * segment with zero filesize is perfectly valid */
382 	if (!size)
383 		return addr;
384 
385 	/*
386 	* total_size is the size of the ELF (interpreter) image.
387 	* The _first_ mmap needs to know the full size, otherwise
388 	* randomization might put this image into an overlapping
389 	* position with the ELF binary image. (since size < total_size)
390 	* So we first map the 'big' image - and unmap the remainder at
391 	* the end. (which unmap is needed for ELF images with holes.)
392 	*/
393 	if (total_size) {
394 		total_size = ELF_PAGEALIGN(total_size);
395 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
396 		if (!BAD_ADDR(map_addr))
397 			vm_munmap(map_addr+size, total_size-size);
398 	} else
399 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
400 
401 	if ((type & MAP_FIXED_NOREPLACE) &&
402 	    PTR_ERR((void *)map_addr) == -EEXIST)
403 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
404 			task_pid_nr(current), current->comm, (void *)addr);
405 
406 	return(map_addr);
407 }
408 
409 /*
410  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
411  * into memory at "addr". Memory from "p_filesz" through "p_memsz"
412  * rounded up to the next page is zeroed.
413  */
elf_load(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)414 static unsigned long elf_load(struct file *filep, unsigned long addr,
415 		const struct elf_phdr *eppnt, int prot, int type,
416 		unsigned long total_size)
417 {
418 	unsigned long zero_start, zero_end;
419 	unsigned long map_addr;
420 
421 	if (eppnt->p_filesz) {
422 		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
423 		if (BAD_ADDR(map_addr))
424 			return map_addr;
425 		if (eppnt->p_memsz > eppnt->p_filesz) {
426 			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
427 				eppnt->p_filesz;
428 			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
429 				eppnt->p_memsz;
430 
431 			/*
432 			 * Zero the end of the last mapped page but ignore
433 			 * any errors if the segment isn't writable.
434 			 */
435 			if (padzero(zero_start) && (prot & PROT_WRITE))
436 				return -EFAULT;
437 		}
438 	} else {
439 		map_addr = zero_start = ELF_PAGESTART(addr);
440 		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
441 			eppnt->p_memsz;
442 	}
443 	if (eppnt->p_memsz > eppnt->p_filesz) {
444 		/*
445 		 * Map the last of the segment.
446 		 * If the header is requesting these pages to be
447 		 * executable, honour that (ppc32 needs this).
448 		 */
449 		int error;
450 
451 		zero_start = ELF_PAGEALIGN(zero_start);
452 		zero_end = ELF_PAGEALIGN(zero_end);
453 
454 		error = vm_brk_flags(zero_start, zero_end - zero_start,
455 				     prot & PROT_EXEC ? VM_EXEC : 0);
456 		if (error)
457 			map_addr = error;
458 	}
459 	return map_addr;
460 }
461 
462 
total_mapping_size(const struct elf_phdr * phdr,int nr)463 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
464 {
465 	elf_addr_t min_addr = -1;
466 	elf_addr_t max_addr = 0;
467 	bool pt_load = false;
468 	int i;
469 
470 	for (i = 0; i < nr; i++) {
471 		if (phdr[i].p_type == PT_LOAD) {
472 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
473 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
474 			pt_load = true;
475 		}
476 	}
477 	return pt_load ? (max_addr - min_addr) : 0;
478 }
479 
elf_read(struct file * file,void * buf,size_t len,loff_t pos)480 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
481 {
482 	ssize_t rv;
483 
484 	rv = kernel_read(file, buf, len, &pos);
485 	if (unlikely(rv != len)) {
486 		return (rv < 0) ? rv : -EIO;
487 	}
488 	return 0;
489 }
490 
maximum_alignment(struct elf_phdr * cmds,int nr)491 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
492 {
493 	unsigned long alignment = 0;
494 	int i;
495 
496 	for (i = 0; i < nr; i++) {
497 		if (cmds[i].p_type == PT_LOAD) {
498 			unsigned long p_align = cmds[i].p_align;
499 
500 			/* skip non-power of two alignments as invalid */
501 			if (!is_power_of_2(p_align))
502 				continue;
503 			alignment = max(alignment, p_align);
504 		}
505 	}
506 
507 	/* ensure we align to at least one page */
508 	return ELF_PAGEALIGN(alignment);
509 }
510 
511 /**
512  * load_elf_phdrs() - load ELF program headers
513  * @elf_ex:   ELF header of the binary whose program headers should be loaded
514  * @elf_file: the opened ELF binary file
515  *
516  * Loads ELF program headers from the binary file elf_file, which has the ELF
517  * header pointed to by elf_ex, into a newly allocated array. The caller is
518  * responsible for freeing the allocated data. Returns NULL upon failure.
519  */
load_elf_phdrs(const struct elfhdr * elf_ex,struct file * elf_file)520 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
521 				       struct file *elf_file)
522 {
523 	struct elf_phdr *elf_phdata = NULL;
524 	int retval = -1;
525 	unsigned int size;
526 
527 	/*
528 	 * If the size of this structure has changed, then punt, since
529 	 * we will be doing the wrong thing.
530 	 */
531 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
532 		goto out;
533 
534 	/* Sanity check the number of program headers... */
535 	/* ...and their total size. */
536 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
537 	if (size == 0 || size > 65536)
538 		goto out;
539 
540 	elf_phdata = kmalloc(size, GFP_KERNEL);
541 	if (!elf_phdata)
542 		goto out;
543 
544 	/* Read in the program headers */
545 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
546 
547 out:
548 	if (retval) {
549 		kfree(elf_phdata);
550 		elf_phdata = NULL;
551 	}
552 	return elf_phdata;
553 }
554 
555 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
556 
557 /**
558  * struct arch_elf_state - arch-specific ELF loading state
559  *
560  * This structure is used to preserve architecture specific data during
561  * the loading of an ELF file, throughout the checking of architecture
562  * specific ELF headers & through to the point where the ELF load is
563  * known to be proceeding (ie. SET_PERSONALITY).
564  *
565  * This implementation is a dummy for architectures which require no
566  * specific state.
567  */
568 struct arch_elf_state {
569 };
570 
571 #define INIT_ARCH_ELF_STATE {}
572 
573 /**
574  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
575  * @ehdr:	The main ELF header
576  * @phdr:	The program header to check
577  * @elf:	The open ELF file
578  * @is_interp:	True if the phdr is from the interpreter of the ELF being
579  *		loaded, else false.
580  * @state:	Architecture-specific state preserved throughout the process
581  *		of loading the ELF.
582  *
583  * Inspects the program header phdr to validate its correctness and/or
584  * suitability for the system. Called once per ELF program header in the
585  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
586  * interpreter.
587  *
588  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
589  *         with that return code.
590  */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)591 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
592 				   struct elf_phdr *phdr,
593 				   struct file *elf, bool is_interp,
594 				   struct arch_elf_state *state)
595 {
596 	/* Dummy implementation, always proceed */
597 	return 0;
598 }
599 
600 /**
601  * arch_check_elf() - check an ELF executable
602  * @ehdr:	The main ELF header
603  * @has_interp:	True if the ELF has an interpreter, else false.
604  * @interp_ehdr: The interpreter's ELF header
605  * @state:	Architecture-specific state preserved throughout the process
606  *		of loading the ELF.
607  *
608  * Provides a final opportunity for architecture code to reject the loading
609  * of the ELF & cause an exec syscall to return an error. This is called after
610  * all program headers to be checked by arch_elf_pt_proc have been.
611  *
612  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
613  *         with that return code.
614  */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)615 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
616 				 struct elfhdr *interp_ehdr,
617 				 struct arch_elf_state *state)
618 {
619 	/* Dummy implementation, always proceed */
620 	return 0;
621 }
622 
623 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
624 
make_prot(u32 p_flags,struct arch_elf_state * arch_state,bool has_interp,bool is_interp)625 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
626 			    bool has_interp, bool is_interp)
627 {
628 	int prot = 0;
629 
630 	if (p_flags & PF_R)
631 		prot |= PROT_READ;
632 	if (p_flags & PF_W)
633 		prot |= PROT_WRITE;
634 	if (p_flags & PF_X)
635 		prot |= PROT_EXEC;
636 
637 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
638 }
639 
640 /* This is much more generalized than the library routine read function,
641    so we keep this separate.  Technically the library read function
642    is only provided so that we can read a.out libraries that have
643    an ELF header */
644 
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long no_base,struct elf_phdr * interp_elf_phdata,struct arch_elf_state * arch_state)645 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
646 		struct file *interpreter,
647 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
648 		struct arch_elf_state *arch_state)
649 {
650 	struct elf_phdr *eppnt;
651 	unsigned long load_addr = 0;
652 	int load_addr_set = 0;
653 	unsigned long error = ~0UL;
654 	unsigned long total_size;
655 	int i;
656 
657 	/* First of all, some simple consistency checks */
658 	if (interp_elf_ex->e_type != ET_EXEC &&
659 	    interp_elf_ex->e_type != ET_DYN)
660 		goto out;
661 	if (!elf_check_arch(interp_elf_ex) ||
662 	    elf_check_fdpic(interp_elf_ex))
663 		goto out;
664 	if (!can_mmap_file(interpreter))
665 		goto out;
666 
667 	total_size = total_mapping_size(interp_elf_phdata,
668 					interp_elf_ex->e_phnum);
669 	if (!total_size) {
670 		error = -EINVAL;
671 		goto out;
672 	}
673 
674 	eppnt = interp_elf_phdata;
675 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
676 		if (eppnt->p_type == PT_LOAD) {
677 			int elf_type = MAP_PRIVATE;
678 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
679 						 true, true);
680 			unsigned long vaddr = 0;
681 			unsigned long k, map_addr;
682 
683 			vaddr = eppnt->p_vaddr;
684 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
685 				elf_type |= MAP_FIXED;
686 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
687 				load_addr = -vaddr;
688 
689 			map_addr = elf_load(interpreter, load_addr + vaddr,
690 					eppnt, elf_prot, elf_type, total_size);
691 			total_size = 0;
692 			error = map_addr;
693 			if (BAD_ADDR(map_addr))
694 				goto out;
695 
696 			if (!load_addr_set &&
697 			    interp_elf_ex->e_type == ET_DYN) {
698 				load_addr = map_addr - ELF_PAGESTART(vaddr);
699 				load_addr_set = 1;
700 			}
701 
702 			/*
703 			 * Check to see if the section's size will overflow the
704 			 * allowed task size. Note that p_filesz must always be
705 			 * <= p_memsize so it's only necessary to check p_memsz.
706 			 */
707 			k = load_addr + eppnt->p_vaddr;
708 			if (BAD_ADDR(k) ||
709 			    eppnt->p_filesz > eppnt->p_memsz ||
710 			    eppnt->p_memsz > TASK_SIZE ||
711 			    TASK_SIZE - eppnt->p_memsz < k) {
712 				error = -ENOMEM;
713 				goto out;
714 			}
715 		}
716 	}
717 
718 	error = load_addr;
719 out:
720 	return error;
721 }
722 
723 /*
724  * These are the functions used to load ELF style executables and shared
725  * libraries.  There is no binary dependent code anywhere else.
726  */
727 
parse_elf_property(const char * data,size_t * off,size_t datasz,struct arch_elf_state * arch,bool have_prev_type,u32 * prev_type)728 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
729 			      struct arch_elf_state *arch,
730 			      bool have_prev_type, u32 *prev_type)
731 {
732 	size_t o, step;
733 	const struct gnu_property *pr;
734 	int ret;
735 
736 	if (*off == datasz)
737 		return -ENOENT;
738 
739 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
740 		return -EIO;
741 	o = *off;
742 	datasz -= *off;
743 
744 	if (datasz < sizeof(*pr))
745 		return -ENOEXEC;
746 	pr = (const struct gnu_property *)(data + o);
747 	o += sizeof(*pr);
748 	datasz -= sizeof(*pr);
749 
750 	if (pr->pr_datasz > datasz)
751 		return -ENOEXEC;
752 
753 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
754 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
755 	if (step > datasz)
756 		return -ENOEXEC;
757 
758 	/* Properties are supposed to be unique and sorted on pr_type: */
759 	if (have_prev_type && pr->pr_type <= *prev_type)
760 		return -ENOEXEC;
761 	*prev_type = pr->pr_type;
762 
763 	ret = arch_parse_elf_property(pr->pr_type, data + o,
764 				      pr->pr_datasz, ELF_COMPAT, arch);
765 	if (ret)
766 		return ret;
767 
768 	*off = o + step;
769 	return 0;
770 }
771 
772 #define NOTE_DATA_SZ SZ_1K
773 #define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
774 
parse_elf_properties(struct file * f,const struct elf_phdr * phdr,struct arch_elf_state * arch)775 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
776 				struct arch_elf_state *arch)
777 {
778 	union {
779 		struct elf_note nhdr;
780 		char data[NOTE_DATA_SZ];
781 	} note;
782 	loff_t pos;
783 	ssize_t n;
784 	size_t off, datasz;
785 	int ret;
786 	bool have_prev_type;
787 	u32 prev_type;
788 
789 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
790 		return 0;
791 
792 	/* load_elf_binary() shouldn't call us unless this is true... */
793 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
794 		return -ENOEXEC;
795 
796 	/* If the properties are crazy large, that's too bad (for now): */
797 	if (phdr->p_filesz > sizeof(note))
798 		return -ENOEXEC;
799 
800 	pos = phdr->p_offset;
801 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
802 
803 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
804 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
805 		return -EIO;
806 
807 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
808 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
809 	    strncmp(note.data + sizeof(note.nhdr),
810 		    NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
811 		return -ENOEXEC;
812 
813 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
814 		       ELF_GNU_PROPERTY_ALIGN);
815 	if (off > n)
816 		return -ENOEXEC;
817 
818 	if (note.nhdr.n_descsz > n - off)
819 		return -ENOEXEC;
820 	datasz = off + note.nhdr.n_descsz;
821 
822 	have_prev_type = false;
823 	do {
824 		ret = parse_elf_property(note.data, &off, datasz, arch,
825 					 have_prev_type, &prev_type);
826 		have_prev_type = true;
827 	} while (!ret);
828 
829 	return ret == -ENOENT ? 0 : ret;
830 }
831 
load_elf_binary(struct linux_binprm * bprm)832 static int load_elf_binary(struct linux_binprm *bprm)
833 {
834 	struct file *interpreter = NULL; /* to shut gcc up */
835 	unsigned long load_bias = 0, phdr_addr = 0;
836 	int first_pt_load = 1;
837 	unsigned long error;
838 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
839 	struct elf_phdr *elf_property_phdata = NULL;
840 	unsigned long elf_brk;
841 	bool brk_moved = false;
842 	int retval, i;
843 	unsigned long elf_entry;
844 	unsigned long e_entry;
845 	unsigned long interp_load_addr = 0;
846 	unsigned long start_code, end_code, start_data, end_data;
847 	unsigned long reloc_func_desc __maybe_unused = 0;
848 	int executable_stack = EXSTACK_DEFAULT;
849 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
850 	struct elfhdr *interp_elf_ex = NULL;
851 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
852 	struct mm_struct *mm;
853 	struct pt_regs *regs;
854 
855 	retval = -ENOEXEC;
856 	/* First of all, some simple consistency checks */
857 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
858 		goto out;
859 
860 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
861 		goto out;
862 	if (!elf_check_arch(elf_ex))
863 		goto out;
864 	if (elf_check_fdpic(elf_ex))
865 		goto out;
866 	if (!can_mmap_file(bprm->file))
867 		goto out;
868 
869 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
870 	if (!elf_phdata)
871 		goto out;
872 
873 	elf_ppnt = elf_phdata;
874 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
875 		char *elf_interpreter;
876 
877 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
878 			elf_property_phdata = elf_ppnt;
879 			continue;
880 		}
881 
882 		if (elf_ppnt->p_type != PT_INTERP)
883 			continue;
884 
885 		/*
886 		 * This is the program interpreter used for shared libraries -
887 		 * for now assume that this is an a.out format binary.
888 		 */
889 		retval = -ENOEXEC;
890 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
891 			goto out_free_ph;
892 
893 		retval = -ENOMEM;
894 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
895 		if (!elf_interpreter)
896 			goto out_free_ph;
897 
898 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
899 				  elf_ppnt->p_offset);
900 		if (retval < 0)
901 			goto out_free_interp;
902 		/* make sure path is NULL terminated */
903 		retval = -ENOEXEC;
904 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
905 			goto out_free_interp;
906 
907 		interpreter = open_exec(elf_interpreter);
908 		kfree(elf_interpreter);
909 		retval = PTR_ERR(interpreter);
910 		if (IS_ERR(interpreter))
911 			goto out_free_ph;
912 
913 		/*
914 		 * If the binary is not readable then enforce mm->dumpable = 0
915 		 * regardless of the interpreter's permissions.
916 		 */
917 		would_dump(bprm, interpreter);
918 
919 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
920 		if (!interp_elf_ex) {
921 			retval = -ENOMEM;
922 			goto out_free_file;
923 		}
924 
925 		/* Get the exec headers */
926 		retval = elf_read(interpreter, interp_elf_ex,
927 				  sizeof(*interp_elf_ex), 0);
928 		if (retval < 0)
929 			goto out_free_dentry;
930 
931 		break;
932 
933 out_free_interp:
934 		kfree(elf_interpreter);
935 		goto out_free_ph;
936 	}
937 
938 	elf_ppnt = elf_phdata;
939 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
940 		switch (elf_ppnt->p_type) {
941 		case PT_GNU_STACK:
942 			if (elf_ppnt->p_flags & PF_X)
943 				executable_stack = EXSTACK_ENABLE_X;
944 			else
945 				executable_stack = EXSTACK_DISABLE_X;
946 			break;
947 
948 		case PT_LOPROC ... PT_HIPROC:
949 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
950 						  bprm->file, false,
951 						  &arch_state);
952 			if (retval)
953 				goto out_free_dentry;
954 			break;
955 		}
956 
957 	/* Some simple consistency checks for the interpreter */
958 	if (interpreter) {
959 		retval = -ELIBBAD;
960 		/* Not an ELF interpreter */
961 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
962 			goto out_free_dentry;
963 		/* Verify the interpreter has a valid arch */
964 		if (!elf_check_arch(interp_elf_ex) ||
965 		    elf_check_fdpic(interp_elf_ex))
966 			goto out_free_dentry;
967 
968 		/* Load the interpreter program headers */
969 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
970 						   interpreter);
971 		if (!interp_elf_phdata)
972 			goto out_free_dentry;
973 
974 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
975 		elf_property_phdata = NULL;
976 		elf_ppnt = interp_elf_phdata;
977 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
978 			switch (elf_ppnt->p_type) {
979 			case PT_GNU_PROPERTY:
980 				elf_property_phdata = elf_ppnt;
981 				break;
982 
983 			case PT_LOPROC ... PT_HIPROC:
984 				retval = arch_elf_pt_proc(interp_elf_ex,
985 							  elf_ppnt, interpreter,
986 							  true, &arch_state);
987 				if (retval)
988 					goto out_free_dentry;
989 				break;
990 			}
991 	}
992 
993 	retval = parse_elf_properties(interpreter ?: bprm->file,
994 				      elf_property_phdata, &arch_state);
995 	if (retval)
996 		goto out_free_dentry;
997 
998 	/*
999 	 * Allow arch code to reject the ELF at this point, whilst it's
1000 	 * still possible to return an error to the code that invoked
1001 	 * the exec syscall.
1002 	 */
1003 	retval = arch_check_elf(elf_ex,
1004 				!!interpreter, interp_elf_ex,
1005 				&arch_state);
1006 	if (retval)
1007 		goto out_free_dentry;
1008 
1009 	/* Flush all traces of the currently running executable */
1010 	retval = begin_new_exec(bprm);
1011 	if (retval)
1012 		goto out_free_dentry;
1013 
1014 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1015 	   may depend on the personality.  */
1016 	SET_PERSONALITY2(*elf_ex, &arch_state);
1017 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1018 		current->personality |= READ_IMPLIES_EXEC;
1019 
1020 	const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1021 	if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1022 		current->flags |= PF_RANDOMIZE;
1023 
1024 	setup_new_exec(bprm);
1025 
1026 	/* Do this so that we can load the interpreter, if need be.  We will
1027 	   change some of these later */
1028 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1029 				 executable_stack);
1030 	if (retval < 0)
1031 		goto out_free_dentry;
1032 
1033 	elf_brk = 0;
1034 
1035 	start_code = ~0UL;
1036 	end_code = 0;
1037 	start_data = 0;
1038 	end_data = 0;
1039 
1040 	/* Now we do a little grungy work by mmapping the ELF image into
1041 	   the correct location in memory. */
1042 	for(i = 0, elf_ppnt = elf_phdata;
1043 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1044 		int elf_prot, elf_flags;
1045 		unsigned long k, vaddr;
1046 		unsigned long total_size = 0;
1047 		unsigned long alignment;
1048 
1049 		if (elf_ppnt->p_type != PT_LOAD)
1050 			continue;
1051 
1052 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1053 				     !!interpreter, false);
1054 
1055 		elf_flags = MAP_PRIVATE;
1056 
1057 		vaddr = elf_ppnt->p_vaddr;
1058 		/*
1059 		 * The first time through the loop, first_pt_load is true:
1060 		 * layout will be calculated. Once set, use MAP_FIXED since
1061 		 * we know we've already safely mapped the entire region with
1062 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1063 		 */
1064 		if (!first_pt_load) {
1065 			elf_flags |= MAP_FIXED;
1066 		} else if (elf_ex->e_type == ET_EXEC) {
1067 			/*
1068 			 * This logic is run once for the first LOAD Program
1069 			 * Header for ET_EXEC binaries. No special handling
1070 			 * is needed.
1071 			 */
1072 			elf_flags |= MAP_FIXED_NOREPLACE;
1073 		} else if (elf_ex->e_type == ET_DYN) {
1074 			/*
1075 			 * This logic is run once for the first LOAD Program
1076 			 * Header for ET_DYN binaries to calculate the
1077 			 * randomization (load_bias) for all the LOAD
1078 			 * Program Headers.
1079 			 */
1080 
1081 			/*
1082 			 * Calculate the entire size of the ELF mapping
1083 			 * (total_size), used for the initial mapping,
1084 			 * due to load_addr_set which is set to true later
1085 			 * once the initial mapping is performed.
1086 			 *
1087 			 * Note that this is only sensible when the LOAD
1088 			 * segments are contiguous (or overlapping). If
1089 			 * used for LOADs that are far apart, this would
1090 			 * cause the holes between LOADs to be mapped,
1091 			 * running the risk of having the mapping fail,
1092 			 * as it would be larger than the ELF file itself.
1093 			 *
1094 			 * As a result, only ET_DYN does this, since
1095 			 * some ET_EXEC (e.g. ia64) may have large virtual
1096 			 * memory holes between LOADs.
1097 			 *
1098 			 */
1099 			total_size = total_mapping_size(elf_phdata,
1100 							elf_ex->e_phnum);
1101 			if (!total_size) {
1102 				retval = -EINVAL;
1103 				goto out_free_dentry;
1104 			}
1105 
1106 			/* Calculate any requested alignment. */
1107 			alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1108 
1109 			/**
1110 			 * DOC: PIE handling
1111 			 *
1112 			 * There are effectively two types of ET_DYN ELF
1113 			 * binaries: programs (i.e. PIE: ET_DYN with
1114 			 * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
1115 			 * without PT_INTERP, usually the ELF interpreter
1116 			 * itself). Loaders must be loaded away from programs
1117 			 * since the program may otherwise collide with the
1118 			 * loader (especially for ET_EXEC which does not have
1119 			 * a randomized position).
1120 			 *
1121 			 * For example, to handle invocations of
1122 			 * "./ld.so someprog" to test out a new version of
1123 			 * the loader, the subsequent program that the
1124 			 * loader loads must avoid the loader itself, so
1125 			 * they cannot share the same load range. Sufficient
1126 			 * room for the brk must be allocated with the
1127 			 * loader as well, since brk must be available with
1128 			 * the loader.
1129 			 *
1130 			 * Therefore, programs are loaded offset from
1131 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1132 			 * independently randomized mmap region (0 load_bias
1133 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1134 			 *
1135 			 * See below for "brk" handling details, which is
1136 			 * also affected by program vs loader and ASLR.
1137 			 */
1138 			if (interpreter) {
1139 				/* On ET_DYN with PT_INTERP, we do the ASLR. */
1140 				load_bias = ELF_ET_DYN_BASE;
1141 				if (current->flags & PF_RANDOMIZE)
1142 					load_bias += arch_mmap_rnd();
1143 				/* Adjust alignment as requested. */
1144 				if (alignment)
1145 					load_bias &= ~(alignment - 1);
1146 				elf_flags |= MAP_FIXED_NOREPLACE;
1147 			} else {
1148 				/*
1149 				 * For ET_DYN without PT_INTERP, we rely on
1150 				 * the architectures's (potentially ASLR) mmap
1151 				 * base address (via a load_bias of 0).
1152 				 *
1153 				 * When a large alignment is requested, we
1154 				 * must do the allocation at address "0" right
1155 				 * now to discover where things will load so
1156 				 * that we can adjust the resulting alignment.
1157 				 * In this case (load_bias != 0), we can use
1158 				 * MAP_FIXED_NOREPLACE to make sure the mapping
1159 				 * doesn't collide with anything.
1160 				 */
1161 				if (alignment > ELF_MIN_ALIGN) {
1162 					load_bias = elf_load(bprm->file, 0, elf_ppnt,
1163 							     elf_prot, elf_flags, total_size);
1164 					if (BAD_ADDR(load_bias)) {
1165 						retval = IS_ERR_VALUE(load_bias) ?
1166 							 PTR_ERR((void*)load_bias) : -EINVAL;
1167 						goto out_free_dentry;
1168 					}
1169 					vm_munmap(load_bias, total_size);
1170 					/* Adjust alignment as requested. */
1171 					if (alignment)
1172 						load_bias &= ~(alignment - 1);
1173 					elf_flags |= MAP_FIXED_NOREPLACE;
1174 				} else
1175 					load_bias = 0;
1176 			}
1177 
1178 			/*
1179 			 * Since load_bias is used for all subsequent loading
1180 			 * calculations, we must lower it by the first vaddr
1181 			 * so that the remaining calculations based on the
1182 			 * ELF vaddrs will be correctly offset. The result
1183 			 * is then page aligned.
1184 			 */
1185 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1186 		}
1187 
1188 		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1189 				elf_prot, elf_flags, total_size);
1190 		if (BAD_ADDR(error)) {
1191 			retval = IS_ERR_VALUE(error) ?
1192 				PTR_ERR((void*)error) : -EINVAL;
1193 			goto out_free_dentry;
1194 		}
1195 
1196 		if (first_pt_load) {
1197 			first_pt_load = 0;
1198 			if (elf_ex->e_type == ET_DYN) {
1199 				load_bias += error -
1200 				             ELF_PAGESTART(load_bias + vaddr);
1201 				reloc_func_desc = load_bias;
1202 			}
1203 		}
1204 
1205 		/*
1206 		 * Figure out which segment in the file contains the Program
1207 		 * Header table, and map to the associated memory address.
1208 		 */
1209 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1210 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1211 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1212 				    elf_ppnt->p_vaddr;
1213 		}
1214 
1215 		k = elf_ppnt->p_vaddr;
1216 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1217 			start_code = k;
1218 		if (start_data < k)
1219 			start_data = k;
1220 
1221 		/*
1222 		 * Check to see if the section's size will overflow the
1223 		 * allowed task size. Note that p_filesz must always be
1224 		 * <= p_memsz so it is only necessary to check p_memsz.
1225 		 */
1226 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1227 		    elf_ppnt->p_memsz > TASK_SIZE ||
1228 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1229 			/* set_brk can never work. Avoid overflows. */
1230 			retval = -EINVAL;
1231 			goto out_free_dentry;
1232 		}
1233 
1234 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1235 
1236 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1237 			end_code = k;
1238 		if (end_data < k)
1239 			end_data = k;
1240 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1241 		if (k > elf_brk)
1242 			elf_brk = k;
1243 	}
1244 
1245 	e_entry = elf_ex->e_entry + load_bias;
1246 	phdr_addr += load_bias;
1247 	elf_brk += load_bias;
1248 	start_code += load_bias;
1249 	end_code += load_bias;
1250 	start_data += load_bias;
1251 	end_data += load_bias;
1252 
1253 	if (interpreter) {
1254 		elf_entry = load_elf_interp(interp_elf_ex,
1255 					    interpreter,
1256 					    load_bias, interp_elf_phdata,
1257 					    &arch_state);
1258 		if (!IS_ERR_VALUE(elf_entry)) {
1259 			/*
1260 			 * load_elf_interp() returns relocation
1261 			 * adjustment
1262 			 */
1263 			interp_load_addr = elf_entry;
1264 			elf_entry += interp_elf_ex->e_entry;
1265 		}
1266 		if (BAD_ADDR(elf_entry)) {
1267 			retval = IS_ERR_VALUE(elf_entry) ?
1268 					(int)elf_entry : -EINVAL;
1269 			goto out_free_dentry;
1270 		}
1271 		reloc_func_desc = interp_load_addr;
1272 
1273 		exe_file_allow_write_access(interpreter);
1274 		fput(interpreter);
1275 
1276 		kfree(interp_elf_ex);
1277 		kfree(interp_elf_phdata);
1278 	} else {
1279 		elf_entry = e_entry;
1280 		if (BAD_ADDR(elf_entry)) {
1281 			retval = -EINVAL;
1282 			goto out_free_dentry;
1283 		}
1284 	}
1285 
1286 	kfree(elf_phdata);
1287 
1288 	set_binfmt(&elf_format);
1289 
1290 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1291 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1292 	if (retval < 0)
1293 		goto out;
1294 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1295 
1296 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1297 				   e_entry, phdr_addr);
1298 	if (retval < 0)
1299 		goto out;
1300 
1301 	mm = current->mm;
1302 	mm->end_code = end_code;
1303 	mm->start_code = start_code;
1304 	mm->start_data = start_data;
1305 	mm->end_data = end_data;
1306 	mm->start_stack = bprm->p;
1307 
1308 	elf_coredump_set_mm_eflags(mm, elf_ex->e_flags);
1309 
1310 	/**
1311 	 * DOC: "brk" handling
1312 	 *
1313 	 * For architectures with ELF randomization, when executing a
1314 	 * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
1315 	 * move the brk area out of the mmap region and into the unused
1316 	 * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
1317 	 * early with the stack growing down or other regions being put
1318 	 * into the mmap region by the kernel (e.g. vdso).
1319 	 *
1320 	 * In the CONFIG_COMPAT_BRK case, though, everything is turned
1321 	 * off because we're not allowed to move the brk at all.
1322 	 */
1323 	if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
1324 	    IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1325 	    elf_ex->e_type == ET_DYN && !interpreter) {
1326 		elf_brk = ELF_ET_DYN_BASE;
1327 		/* This counts as moving the brk, so let brk(2) know. */
1328 		brk_moved = true;
1329 	}
1330 	mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
1331 
1332 	if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
1333 		/*
1334 		 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
1335 		 * leave a gap between .bss and brk.
1336 		 */
1337 		if (!brk_moved)
1338 			mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1339 
1340 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1341 		brk_moved = true;
1342 	}
1343 
1344 #ifdef compat_brk_randomized
1345 	if (brk_moved)
1346 		current->brk_randomized = 1;
1347 #endif
1348 
1349 	if (current->personality & MMAP_PAGE_ZERO) {
1350 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1351 		   and some applications "depend" upon this behavior.
1352 		   Since we do not have the power to recompile these, we
1353 		   emulate the SVr4 behavior. Sigh. */
1354 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1355 				MAP_FIXED | MAP_PRIVATE, 0);
1356 
1357 		retval = do_mseal(0, PAGE_SIZE, 0);
1358 		if (retval)
1359 			pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
1360 					    task_pid_nr(current), retval);
1361 	}
1362 
1363 	regs = current_pt_regs();
1364 #ifdef ELF_PLAT_INIT
1365 	/*
1366 	 * The ABI may specify that certain registers be set up in special
1367 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1368 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1369 	 * that the e_entry field is the address of the function descriptor
1370 	 * for the startup routine, rather than the address of the startup
1371 	 * routine itself.  This macro performs whatever initialization to
1372 	 * the regs structure is required as well as any relocations to the
1373 	 * function descriptor entries when executing dynamically links apps.
1374 	 */
1375 	ELF_PLAT_INIT(regs, reloc_func_desc);
1376 #endif
1377 
1378 	finalize_exec(bprm);
1379 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1380 	retval = 0;
1381 out:
1382 	return retval;
1383 
1384 	/* error cleanup */
1385 out_free_dentry:
1386 	kfree(interp_elf_ex);
1387 	kfree(interp_elf_phdata);
1388 out_free_file:
1389 	exe_file_allow_write_access(interpreter);
1390 	if (interpreter)
1391 		fput(interpreter);
1392 out_free_ph:
1393 	kfree(elf_phdata);
1394 	goto out;
1395 }
1396 
1397 #ifdef CONFIG_ELF_CORE
1398 /*
1399  * ELF core dumper
1400  *
1401  * Modelled on fs/exec.c:aout_core_dump()
1402  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1403  */
1404 
1405 /* An ELF note in memory */
1406 struct memelfnote
1407 {
1408 	const char *name;
1409 	int type;
1410 	unsigned int datasz;
1411 	void *data;
1412 };
1413 
notesize(struct memelfnote * en)1414 static int notesize(struct memelfnote *en)
1415 {
1416 	int sz;
1417 
1418 	sz = sizeof(struct elf_note);
1419 	sz += roundup(strlen(en->name) + 1, 4);
1420 	sz += roundup(en->datasz, 4);
1421 
1422 	return sz;
1423 }
1424 
writenote(struct memelfnote * men,struct coredump_params * cprm)1425 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1426 {
1427 	struct elf_note en;
1428 	en.n_namesz = strlen(men->name) + 1;
1429 	en.n_descsz = men->datasz;
1430 	en.n_type = men->type;
1431 
1432 	return dump_emit(cprm, &en, sizeof(en)) &&
1433 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1434 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1435 }
1436 
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1437 static void fill_elf_header(struct elfhdr *elf, int segs,
1438 			    u16 machine, u32 flags)
1439 {
1440 	memset(elf, 0, sizeof(*elf));
1441 
1442 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1443 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1444 	elf->e_ident[EI_DATA] = ELF_DATA;
1445 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1446 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1447 
1448 	elf->e_type = ET_CORE;
1449 	elf->e_machine = machine;
1450 	elf->e_version = EV_CURRENT;
1451 	elf->e_phoff = sizeof(struct elfhdr);
1452 	elf->e_flags = flags;
1453 	elf->e_ehsize = sizeof(struct elfhdr);
1454 	elf->e_phentsize = sizeof(struct elf_phdr);
1455 	elf->e_phnum = segs;
1456 }
1457 
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1458 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1459 {
1460 	phdr->p_type = PT_NOTE;
1461 	phdr->p_offset = offset;
1462 	phdr->p_vaddr = 0;
1463 	phdr->p_paddr = 0;
1464 	phdr->p_filesz = sz;
1465 	phdr->p_memsz = 0;
1466 	phdr->p_flags = 0;
1467 	phdr->p_align = 4;
1468 }
1469 
__fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1470 static void __fill_note(struct memelfnote *note, const char *name, int type,
1471 			unsigned int sz, void *data)
1472 {
1473 	note->name = name;
1474 	note->type = type;
1475 	note->datasz = sz;
1476 	note->data = data;
1477 }
1478 
1479 #define fill_note(note, type, sz, data) \
1480 	__fill_note(note, NN_ ## type, NT_ ## type, sz, data)
1481 
1482 /*
1483  * fill up all the fields in prstatus from the given task struct, except
1484  * registers which need to be filled up separately.
1485  */
fill_prstatus(struct elf_prstatus_common * prstatus,struct task_struct * p,long signr)1486 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1487 		struct task_struct *p, long signr)
1488 {
1489 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1490 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1491 	prstatus->pr_sighold = p->blocked.sig[0];
1492 	rcu_read_lock();
1493 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1494 	rcu_read_unlock();
1495 	prstatus->pr_pid = task_pid_vnr(p);
1496 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1497 	prstatus->pr_sid = task_session_vnr(p);
1498 	if (thread_group_leader(p)) {
1499 		struct task_cputime cputime;
1500 
1501 		/*
1502 		 * This is the record for the group leader.  It shows the
1503 		 * group-wide total, not its individual thread total.
1504 		 */
1505 		thread_group_cputime(p, &cputime);
1506 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1507 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1508 	} else {
1509 		u64 utime, stime;
1510 
1511 		task_cputime(p, &utime, &stime);
1512 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1513 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1514 	}
1515 
1516 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1517 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1518 }
1519 
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1520 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1521 		       struct mm_struct *mm)
1522 {
1523 	const struct cred *cred;
1524 	unsigned int i, len;
1525 	unsigned int state;
1526 
1527 	/* first copy the parameters from user space */
1528 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1529 
1530 	len = mm->arg_end - mm->arg_start;
1531 	if (len >= ELF_PRARGSZ)
1532 		len = ELF_PRARGSZ-1;
1533 	if (copy_from_user(&psinfo->pr_psargs,
1534 		           (const char __user *)mm->arg_start, len))
1535 		return -EFAULT;
1536 	for(i = 0; i < len; i++)
1537 		if (psinfo->pr_psargs[i] == 0)
1538 			psinfo->pr_psargs[i] = ' ';
1539 	psinfo->pr_psargs[len] = 0;
1540 
1541 	rcu_read_lock();
1542 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1543 	rcu_read_unlock();
1544 	psinfo->pr_pid = task_pid_vnr(p);
1545 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1546 	psinfo->pr_sid = task_session_vnr(p);
1547 
1548 	state = READ_ONCE(p->__state);
1549 	i = state ? ffz(~state) + 1 : 0;
1550 	psinfo->pr_state = i;
1551 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1552 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1553 	psinfo->pr_nice = task_nice(p);
1554 	psinfo->pr_flag = p->flags;
1555 	rcu_read_lock();
1556 	cred = __task_cred(p);
1557 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1558 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1559 	rcu_read_unlock();
1560 	get_task_comm(psinfo->pr_fname, p);
1561 
1562 	return 0;
1563 }
1564 
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1565 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1566 {
1567 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1568 	int i = 0;
1569 	do
1570 		i += 2;
1571 	while (auxv[i - 2] != AT_NULL);
1572 	fill_note(note, AUXV, i * sizeof(elf_addr_t), auxv);
1573 }
1574 
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const kernel_siginfo_t * siginfo)1575 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1576 		const kernel_siginfo_t *siginfo)
1577 {
1578 	copy_siginfo_to_external(csigdata, siginfo);
1579 	fill_note(note, SIGINFO, sizeof(*csigdata), csigdata);
1580 }
1581 
1582 /*
1583  * Format of NT_FILE note:
1584  *
1585  * long count     -- how many files are mapped
1586  * long page_size -- units for file_ofs
1587  * array of [COUNT] elements of
1588  *   long start
1589  *   long end
1590  *   long file_ofs
1591  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1592  */
fill_files_note(struct memelfnote * note,struct coredump_params * cprm)1593 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1594 {
1595 	unsigned count, size, names_ofs, remaining, n;
1596 	user_long_t *data;
1597 	user_long_t *start_end_ofs;
1598 	char *name_base, *name_curpos;
1599 	int i;
1600 
1601 	/* *Estimated* file count and total data size needed */
1602 	count = cprm->vma_count;
1603 	if (count > UINT_MAX / 64)
1604 		return -EINVAL;
1605 	size = count * 64;
1606 
1607 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1608  alloc:
1609 	/* paranoia check */
1610 	if (size >= core_file_note_size_limit) {
1611 		pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1612 			      size);
1613 		return -EINVAL;
1614 	}
1615 	size = round_up(size, PAGE_SIZE);
1616 	/*
1617 	 * "size" can be 0 here legitimately.
1618 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1619 	 */
1620 	data = kvmalloc(size, GFP_KERNEL);
1621 	if (ZERO_OR_NULL_PTR(data))
1622 		return -ENOMEM;
1623 
1624 	start_end_ofs = data + 2;
1625 	name_base = name_curpos = ((char *)data) + names_ofs;
1626 	remaining = size - names_ofs;
1627 	count = 0;
1628 	for (i = 0; i < cprm->vma_count; i++) {
1629 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1630 		struct file *file;
1631 		const char *filename;
1632 
1633 		file = m->file;
1634 		if (!file)
1635 			continue;
1636 		filename = file_path(file, name_curpos, remaining);
1637 		if (IS_ERR(filename)) {
1638 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1639 				kvfree(data);
1640 				size = size * 5 / 4;
1641 				goto alloc;
1642 			}
1643 			continue;
1644 		}
1645 
1646 		/* file_path() fills at the end, move name down */
1647 		/* n = strlen(filename) + 1: */
1648 		n = (name_curpos + remaining) - filename;
1649 		remaining = filename - name_curpos;
1650 		memmove(name_curpos, filename, n);
1651 		name_curpos += n;
1652 
1653 		*start_end_ofs++ = m->start;
1654 		*start_end_ofs++ = m->end;
1655 		*start_end_ofs++ = m->pgoff;
1656 		count++;
1657 	}
1658 
1659 	/* Now we know exact count of files, can store it */
1660 	data[0] = count;
1661 	data[1] = PAGE_SIZE;
1662 	/*
1663 	 * Count usually is less than mm->map_count,
1664 	 * we need to move filenames down.
1665 	 */
1666 	n = cprm->vma_count - count;
1667 	if (n != 0) {
1668 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1669 		memmove(name_base - shift_bytes, name_base,
1670 			name_curpos - name_base);
1671 		name_curpos -= shift_bytes;
1672 	}
1673 
1674 	size = name_curpos - (char *)data;
1675 	fill_note(note, FILE, size, data);
1676 	return 0;
1677 }
1678 
1679 #include <linux/regset.h>
1680 
1681 struct elf_thread_core_info {
1682 	struct elf_thread_core_info *next;
1683 	struct task_struct *task;
1684 	struct elf_prstatus prstatus;
1685 	struct memelfnote notes[];
1686 };
1687 
1688 struct elf_note_info {
1689 	struct elf_thread_core_info *thread;
1690 	struct memelfnote psinfo;
1691 	struct memelfnote signote;
1692 	struct memelfnote auxv;
1693 	struct memelfnote files;
1694 	user_siginfo_t csigdata;
1695 	size_t size;
1696 	int thread_notes;
1697 };
1698 
1699 #ifdef CORE_DUMP_USE_REGSET
1700 /*
1701  * When a regset has a writeback hook, we call it on each thread before
1702  * dumping user memory.  On register window machines, this makes sure the
1703  * user memory backing the register data is up to date before we read it.
1704  */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1705 static void do_thread_regset_writeback(struct task_struct *task,
1706 				       const struct user_regset *regset)
1707 {
1708 	if (regset->writeback)
1709 		regset->writeback(task, regset, 1);
1710 }
1711 
1712 #ifndef PRSTATUS_SIZE
1713 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1714 #endif
1715 
1716 #ifndef SET_PR_FPVALID
1717 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1718 #endif
1719 
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1720 static int fill_thread_core_info(struct elf_thread_core_info *t,
1721 				 const struct user_regset_view *view,
1722 				 long signr, struct elf_note_info *info)
1723 {
1724 	unsigned int note_iter, view_iter;
1725 
1726 	/*
1727 	 * NT_PRSTATUS is the one special case, because the regset data
1728 	 * goes into the pr_reg field inside the note contents, rather
1729 	 * than being the whole note contents.  We fill the regset in here.
1730 	 * We assume that regset 0 is NT_PRSTATUS.
1731 	 */
1732 	fill_prstatus(&t->prstatus.common, t->task, signr);
1733 	regset_get(t->task, &view->regsets[0],
1734 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1735 
1736 	fill_note(&t->notes[0], PRSTATUS, PRSTATUS_SIZE, &t->prstatus);
1737 	info->size += notesize(&t->notes[0]);
1738 
1739 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1740 
1741 	/*
1742 	 * Each other regset might generate a note too.  For each regset
1743 	 * that has no core_note_type or is inactive, skip it.
1744 	 */
1745 	note_iter = 1;
1746 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1747 		const struct user_regset *regset = &view->regsets[view_iter];
1748 		int note_type = regset->core_note_type;
1749 		const char *note_name = regset->core_note_name;
1750 		bool is_fpreg = note_type == NT_PRFPREG;
1751 		void *data;
1752 		int ret;
1753 
1754 		do_thread_regset_writeback(t->task, regset);
1755 		if (!note_type) // not for coredumps
1756 			continue;
1757 		if (regset->active && regset->active(t->task, regset) <= 0)
1758 			continue;
1759 
1760 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1761 		if (ret < 0)
1762 			continue;
1763 
1764 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1765 			break;
1766 
1767 		if (is_fpreg)
1768 			SET_PR_FPVALID(&t->prstatus);
1769 
1770 		/* There should be a note name, but if not, guess: */
1771 		if (WARN_ON_ONCE(!note_name))
1772 			note_name = "LINUX";
1773 		else
1774 			/* Warn on non-legacy-compatible names, for now. */
1775 			WARN_ON_ONCE(strcmp(note_name,
1776 					    is_fpreg ? "CORE" : "LINUX"));
1777 
1778 		__fill_note(&t->notes[note_iter], note_name, note_type,
1779 			    ret, data);
1780 
1781 		info->size += notesize(&t->notes[note_iter]);
1782 		note_iter++;
1783 	}
1784 
1785 	return 1;
1786 }
1787 #else
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1788 static int fill_thread_core_info(struct elf_thread_core_info *t,
1789 				 const struct user_regset_view *view,
1790 				 long signr, struct elf_note_info *info)
1791 {
1792 	struct task_struct *p = t->task;
1793 	elf_fpregset_t *fpu;
1794 
1795 	fill_prstatus(&t->prstatus.common, p, signr);
1796 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1797 
1798 	fill_note(&t->notes[0], PRSTATUS, sizeof(t->prstatus), &t->prstatus);
1799 	info->size += notesize(&t->notes[0]);
1800 
1801 	fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1802 	if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1803 		kfree(fpu);
1804 		return 1;
1805 	}
1806 
1807 	t->prstatus.pr_fpvalid = 1;
1808 	fill_note(&t->notes[1], PRFPREG, sizeof(*fpu), fpu);
1809 	info->size += notesize(&t->notes[1]);
1810 
1811 	return 1;
1812 }
1813 #endif
1814 
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,struct coredump_params * cprm)1815 static int fill_note_info(struct elfhdr *elf, int phdrs,
1816 			  struct elf_note_info *info,
1817 			  struct coredump_params *cprm)
1818 {
1819 	struct task_struct *dump_task = current;
1820 	const struct user_regset_view *view;
1821 	struct elf_thread_core_info *t;
1822 	struct elf_prpsinfo *psinfo;
1823 	struct core_thread *ct;
1824 	u16 machine;
1825 	u32 flags;
1826 
1827 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1828 	if (!psinfo)
1829 		return 0;
1830 	fill_note(&info->psinfo, PRPSINFO, sizeof(*psinfo), psinfo);
1831 
1832 #ifdef CORE_DUMP_USE_REGSET
1833 	view = task_user_regset_view(dump_task);
1834 
1835 	/*
1836 	 * Figure out how many notes we're going to need for each thread.
1837 	 */
1838 	info->thread_notes = 0;
1839 	for (int i = 0; i < view->n; ++i)
1840 		if (view->regsets[i].core_note_type != 0)
1841 			++info->thread_notes;
1842 
1843 	/*
1844 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1845 	 * since it is our one special case.
1846 	 */
1847 	if (unlikely(info->thread_notes == 0) ||
1848 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1849 		WARN_ON(1);
1850 		return 0;
1851 	}
1852 
1853 	machine = view->e_machine;
1854 	flags = view->e_flags;
1855 #else
1856 	view = NULL;
1857 	info->thread_notes = 2;
1858 	machine = ELF_ARCH;
1859 	flags = ELF_CORE_EFLAGS;
1860 #endif
1861 
1862 	/*
1863 	 * Override ELF e_flags with value taken from process,
1864 	 * if arch needs that.
1865 	 */
1866 	flags = elf_coredump_get_mm_eflags(dump_task->mm, flags);
1867 
1868 	/*
1869 	 * Initialize the ELF file header.
1870 	 */
1871 	fill_elf_header(elf, phdrs, machine, flags);
1872 
1873 	/*
1874 	 * Allocate a structure for each thread.
1875 	 */
1876 	info->thread = kzalloc(struct_size(info->thread, notes, info->thread_notes),
1877 			       GFP_KERNEL);
1878 	if (unlikely(!info->thread))
1879 		return 0;
1880 
1881 	info->thread->task = dump_task;
1882 	for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1883 		t = kzalloc(struct_size(t, notes, info->thread_notes),
1884 			    GFP_KERNEL);
1885 		if (unlikely(!t))
1886 			return 0;
1887 
1888 		t->task = ct->task;
1889 		t->next = info->thread->next;
1890 		info->thread->next = t;
1891 	}
1892 
1893 	/*
1894 	 * Now fill in each thread's information.
1895 	 */
1896 	for (t = info->thread; t != NULL; t = t->next)
1897 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1898 			return 0;
1899 
1900 	/*
1901 	 * Fill in the two process-wide notes.
1902 	 */
1903 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1904 	info->size += notesize(&info->psinfo);
1905 
1906 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1907 	info->size += notesize(&info->signote);
1908 
1909 	fill_auxv_note(&info->auxv, current->mm);
1910 	info->size += notesize(&info->auxv);
1911 
1912 	if (fill_files_note(&info->files, cprm) == 0)
1913 		info->size += notesize(&info->files);
1914 
1915 	return 1;
1916 }
1917 
1918 /*
1919  * Write all the notes for each thread.  When writing the first thread, the
1920  * process-wide notes are interleaved after the first thread-specific note.
1921  */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1922 static int write_note_info(struct elf_note_info *info,
1923 			   struct coredump_params *cprm)
1924 {
1925 	bool first = true;
1926 	struct elf_thread_core_info *t = info->thread;
1927 
1928 	do {
1929 		int i;
1930 
1931 		if (!writenote(&t->notes[0], cprm))
1932 			return 0;
1933 
1934 		if (first && !writenote(&info->psinfo, cprm))
1935 			return 0;
1936 		if (first && !writenote(&info->signote, cprm))
1937 			return 0;
1938 		if (first && !writenote(&info->auxv, cprm))
1939 			return 0;
1940 		if (first && info->files.data &&
1941 				!writenote(&info->files, cprm))
1942 			return 0;
1943 
1944 		for (i = 1; i < info->thread_notes; ++i)
1945 			if (t->notes[i].data &&
1946 			    !writenote(&t->notes[i], cprm))
1947 				return 0;
1948 
1949 		first = false;
1950 		t = t->next;
1951 	} while (t);
1952 
1953 	return 1;
1954 }
1955 
free_note_info(struct elf_note_info * info)1956 static void free_note_info(struct elf_note_info *info)
1957 {
1958 	struct elf_thread_core_info *threads = info->thread;
1959 	while (threads) {
1960 		unsigned int i;
1961 		struct elf_thread_core_info *t = threads;
1962 		threads = t->next;
1963 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1964 		for (i = 1; i < info->thread_notes; ++i)
1965 			kvfree(t->notes[i].data);
1966 		kfree(t);
1967 	}
1968 	kfree(info->psinfo.data);
1969 	kvfree(info->files.data);
1970 }
1971 
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)1972 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1973 			     elf_addr_t e_shoff, int segs)
1974 {
1975 	elf->e_shoff = e_shoff;
1976 	elf->e_shentsize = sizeof(*shdr4extnum);
1977 	elf->e_shnum = 1;
1978 	elf->e_shstrndx = SHN_UNDEF;
1979 
1980 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1981 
1982 	shdr4extnum->sh_type = SHT_NULL;
1983 	shdr4extnum->sh_size = elf->e_shnum;
1984 	shdr4extnum->sh_link = elf->e_shstrndx;
1985 	shdr4extnum->sh_info = segs;
1986 }
1987 
1988 /*
1989  * Actual dumper
1990  *
1991  * This is a two-pass process; first we find the offsets of the bits,
1992  * and then they are actually written out.  If we run out of core limit
1993  * we just truncate.
1994  */
elf_core_dump(struct coredump_params * cprm)1995 static int elf_core_dump(struct coredump_params *cprm)
1996 {
1997 	int has_dumped = 0;
1998 	int segs, i;
1999 	struct elfhdr elf;
2000 	loff_t offset = 0, dataoff;
2001 	struct elf_note_info info = { };
2002 	struct elf_phdr *phdr4note = NULL;
2003 	struct elf_shdr *shdr4extnum = NULL;
2004 	Elf_Half e_phnum;
2005 	elf_addr_t e_shoff;
2006 
2007 	/*
2008 	 * The number of segs are recored into ELF header as 16bit value.
2009 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2010 	 */
2011 	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
2012 
2013 	/* for notes section */
2014 	segs++;
2015 
2016 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2017 	 * this, kernel supports extended numbering. Have a look at
2018 	 * include/linux/elf.h for further information. */
2019 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2020 
2021 	/*
2022 	 * Collect all the non-memory information about the process for the
2023 	 * notes.  This also sets up the file header.
2024 	 */
2025 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2026 		goto end_coredump;
2027 
2028 	has_dumped = 1;
2029 
2030 	offset += sizeof(elf);				/* ELF header */
2031 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2032 
2033 	/* Write notes phdr entry */
2034 	{
2035 		size_t sz = info.size;
2036 
2037 		/* For cell spufs and x86 xstate */
2038 		sz += elf_coredump_extra_notes_size();
2039 
2040 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2041 		if (!phdr4note)
2042 			goto end_coredump;
2043 
2044 		fill_elf_note_phdr(phdr4note, sz, offset);
2045 		offset += sz;
2046 	}
2047 
2048 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2049 
2050 	offset += cprm->vma_data_size;
2051 	offset += elf_core_extra_data_size(cprm);
2052 	e_shoff = offset;
2053 
2054 	if (e_phnum == PN_XNUM) {
2055 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2056 		if (!shdr4extnum)
2057 			goto end_coredump;
2058 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2059 	}
2060 
2061 	offset = dataoff;
2062 
2063 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2064 		goto end_coredump;
2065 
2066 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2067 		goto end_coredump;
2068 
2069 	/* Write program headers for segments dump */
2070 	for (i = 0; i < cprm->vma_count; i++) {
2071 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2072 		struct elf_phdr phdr;
2073 
2074 		phdr.p_type = PT_LOAD;
2075 		phdr.p_offset = offset;
2076 		phdr.p_vaddr = meta->start;
2077 		phdr.p_paddr = 0;
2078 		phdr.p_filesz = meta->dump_size;
2079 		phdr.p_memsz = meta->end - meta->start;
2080 		offset += phdr.p_filesz;
2081 		phdr.p_flags = 0;
2082 		if (meta->flags & VM_READ)
2083 			phdr.p_flags |= PF_R;
2084 		if (meta->flags & VM_WRITE)
2085 			phdr.p_flags |= PF_W;
2086 		if (meta->flags & VM_EXEC)
2087 			phdr.p_flags |= PF_X;
2088 		phdr.p_align = ELF_EXEC_PAGESIZE;
2089 
2090 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2091 			goto end_coredump;
2092 	}
2093 
2094 	if (!elf_core_write_extra_phdrs(cprm, offset))
2095 		goto end_coredump;
2096 
2097 	/* write out the notes section */
2098 	if (!write_note_info(&info, cprm))
2099 		goto end_coredump;
2100 
2101 	/* For cell spufs and x86 xstate */
2102 	if (elf_coredump_extra_notes_write(cprm))
2103 		goto end_coredump;
2104 
2105 	/* Align to page */
2106 	dump_skip_to(cprm, dataoff);
2107 
2108 	for (i = 0; i < cprm->vma_count; i++) {
2109 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2110 
2111 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2112 			goto end_coredump;
2113 	}
2114 
2115 	if (!elf_core_write_extra_data(cprm))
2116 		goto end_coredump;
2117 
2118 	if (e_phnum == PN_XNUM) {
2119 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2120 			goto end_coredump;
2121 	}
2122 
2123 end_coredump:
2124 	free_note_info(&info);
2125 	kfree(shdr4extnum);
2126 	kfree(phdr4note);
2127 	return has_dumped;
2128 }
2129 
2130 #endif		/* CONFIG_ELF_CORE */
2131 
init_elf_binfmt(void)2132 static int __init init_elf_binfmt(void)
2133 {
2134 	register_binfmt(&elf_format);
2135 	return 0;
2136 }
2137 
exit_elf_binfmt(void)2138 static void __exit exit_elf_binfmt(void)
2139 {
2140 	/* Remove the COFF and ELF loaders. */
2141 	unregister_binfmt(&elf_format);
2142 }
2143 
2144 core_initcall(init_elf_binfmt);
2145 module_exit(exit_elf_binfmt);
2146 
2147 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2148 #include "tests/binfmt_elf_kunit.c"
2149 #endif
2150