xref: /linux/fs/binfmt_elf.c (revision 7a9b709e7cc5ce1ffb84ce07bf6d157e1de758df)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <linux/rseq.h>
50 #include <asm/param.h>
51 #include <asm/page.h>
52 
53 #ifndef ELF_COMPAT
54 #define ELF_COMPAT 0
55 #endif
56 
57 #ifndef user_long_t
58 #define user_long_t long
59 #endif
60 #ifndef user_siginfo_t
61 #define user_siginfo_t siginfo_t
62 #endif
63 
64 /* That's for binfmt_elf_fdpic to deal with */
65 #ifndef elf_check_fdpic
66 #define elf_check_fdpic(ex) false
67 #endif
68 
69 static int load_elf_binary(struct linux_binprm *bprm);
70 
71 #ifdef CONFIG_USELIB
72 static int load_elf_library(struct file *);
73 #else
74 #define load_elf_library NULL
75 #endif
76 
77 /*
78  * If we don't support core dumping, then supply a NULL so we
79  * don't even try.
80  */
81 #ifdef CONFIG_ELF_CORE
82 static int elf_core_dump(struct coredump_params *cprm);
83 #else
84 #define elf_core_dump	NULL
85 #endif
86 
87 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
88 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
89 #else
90 #define ELF_MIN_ALIGN	PAGE_SIZE
91 #endif
92 
93 #ifndef ELF_CORE_EFLAGS
94 #define ELF_CORE_EFLAGS	0
95 #endif
96 
97 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
98 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
99 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
100 
101 static struct linux_binfmt elf_format = {
102 	.module		= THIS_MODULE,
103 	.load_binary	= load_elf_binary,
104 	.load_shlib	= load_elf_library,
105 #ifdef CONFIG_COREDUMP
106 	.core_dump	= elf_core_dump,
107 	.min_coredump	= ELF_EXEC_PAGESIZE,
108 #endif
109 };
110 
111 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
112 
113 /*
114  * We need to explicitly zero any trailing portion of the page that follows
115  * p_filesz when it ends before the page ends (e.g. bss), otherwise this
116  * memory will contain the junk from the file that should not be present.
117  */
118 static int padzero(unsigned long address)
119 {
120 	unsigned long nbyte;
121 
122 	nbyte = ELF_PAGEOFFSET(address);
123 	if (nbyte) {
124 		nbyte = ELF_MIN_ALIGN - nbyte;
125 		if (clear_user((void __user *)address, nbyte))
126 			return -EFAULT;
127 	}
128 	return 0;
129 }
130 
131 /* Let's use some macros to make this stack manipulation a little clearer */
132 #ifdef CONFIG_STACK_GROWSUP
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
134 #define STACK_ROUND(sp, items) \
135 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ \
137 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
138 	old_sp; })
139 #else
140 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
141 #define STACK_ROUND(sp, items) \
142 	(((unsigned long) (sp - items)) &~ 15UL)
143 #define STACK_ALLOC(sp, len) (sp -= len)
144 #endif
145 
146 #ifndef ELF_BASE_PLATFORM
147 /*
148  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
149  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
150  * will be copied to the user stack in the same manner as AT_PLATFORM.
151  */
152 #define ELF_BASE_PLATFORM NULL
153 #endif
154 
155 static int
156 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
157 		unsigned long interp_load_addr,
158 		unsigned long e_entry, unsigned long phdr_addr)
159 {
160 	struct mm_struct *mm = current->mm;
161 	unsigned long p = bprm->p;
162 	int argc = bprm->argc;
163 	int envc = bprm->envc;
164 	elf_addr_t __user *sp;
165 	elf_addr_t __user *u_platform;
166 	elf_addr_t __user *u_base_platform;
167 	elf_addr_t __user *u_rand_bytes;
168 	const char *k_platform = ELF_PLATFORM;
169 	const char *k_base_platform = ELF_BASE_PLATFORM;
170 	unsigned char k_rand_bytes[16];
171 	int items;
172 	elf_addr_t *elf_info;
173 	elf_addr_t flags = 0;
174 	int ei_index;
175 	const struct cred *cred = current_cred();
176 	struct vm_area_struct *vma;
177 
178 	/*
179 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
180 	 * evictions by the processes running on the same package. One
181 	 * thing we can do is to shuffle the initial stack for them.
182 	 */
183 
184 	p = arch_align_stack(p);
185 
186 	/*
187 	 * If this architecture has a platform capability string, copy it
188 	 * to userspace.  In some cases (Sparc), this info is impossible
189 	 * for userspace to get any other way, in others (i386) it is
190 	 * merely difficult.
191 	 */
192 	u_platform = NULL;
193 	if (k_platform) {
194 		size_t len = strlen(k_platform) + 1;
195 
196 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 		if (copy_to_user(u_platform, k_platform, len))
198 			return -EFAULT;
199 	}
200 
201 	/*
202 	 * If this architecture has a "base" platform capability
203 	 * string, copy it to userspace.
204 	 */
205 	u_base_platform = NULL;
206 	if (k_base_platform) {
207 		size_t len = strlen(k_base_platform) + 1;
208 
209 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
210 		if (copy_to_user(u_base_platform, k_base_platform, len))
211 			return -EFAULT;
212 	}
213 
214 	/*
215 	 * Generate 16 random bytes for userspace PRNG seeding.
216 	 */
217 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
218 	u_rand_bytes = (elf_addr_t __user *)
219 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
220 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
221 		return -EFAULT;
222 
223 	/* Create the ELF interpreter info */
224 	elf_info = (elf_addr_t *)mm->saved_auxv;
225 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
226 #define NEW_AUX_ENT(id, val) \
227 	do { \
228 		*elf_info++ = id; \
229 		*elf_info++ = val; \
230 	} while (0)
231 
232 #ifdef ARCH_DLINFO
233 	/*
234 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
235 	 * AUXV.
236 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
237 	 * ARCH_DLINFO changes
238 	 */
239 	ARCH_DLINFO;
240 #endif
241 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
242 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
243 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
244 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
245 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
246 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
247 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
248 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
249 		flags |= AT_FLAGS_PRESERVE_ARGV0;
250 	NEW_AUX_ENT(AT_FLAGS, flags);
251 	NEW_AUX_ENT(AT_ENTRY, e_entry);
252 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
253 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
254 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
255 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
256 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
257 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
258 #ifdef ELF_HWCAP2
259 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
260 #endif
261 #ifdef ELF_HWCAP3
262 	NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
263 #endif
264 #ifdef ELF_HWCAP4
265 	NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
266 #endif
267 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
268 	if (k_platform) {
269 		NEW_AUX_ENT(AT_PLATFORM,
270 			    (elf_addr_t)(unsigned long)u_platform);
271 	}
272 	if (k_base_platform) {
273 		NEW_AUX_ENT(AT_BASE_PLATFORM,
274 			    (elf_addr_t)(unsigned long)u_base_platform);
275 	}
276 	if (bprm->have_execfd) {
277 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
278 	}
279 #ifdef CONFIG_RSEQ
280 	NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
281 	NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
282 #endif
283 #undef NEW_AUX_ENT
284 	/* AT_NULL is zero; clear the rest too */
285 	memset(elf_info, 0, (char *)mm->saved_auxv +
286 			sizeof(mm->saved_auxv) - (char *)elf_info);
287 
288 	/* And advance past the AT_NULL entry.  */
289 	elf_info += 2;
290 
291 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
292 	sp = STACK_ADD(p, ei_index);
293 
294 	items = (argc + 1) + (envc + 1) + 1;
295 	bprm->p = STACK_ROUND(sp, items);
296 
297 	/* Point sp at the lowest address on the stack */
298 #ifdef CONFIG_STACK_GROWSUP
299 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
300 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
301 #else
302 	sp = (elf_addr_t __user *)bprm->p;
303 #endif
304 
305 
306 	/*
307 	 * Grow the stack manually; some architectures have a limit on how
308 	 * far ahead a user-space access may be in order to grow the stack.
309 	 */
310 	if (mmap_write_lock_killable(mm))
311 		return -EINTR;
312 	vma = find_extend_vma_locked(mm, bprm->p);
313 	mmap_write_unlock(mm);
314 	if (!vma)
315 		return -EFAULT;
316 
317 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
318 	if (put_user(argc, sp++))
319 		return -EFAULT;
320 
321 	/* Populate list of argv pointers back to argv strings. */
322 	p = mm->arg_end = mm->arg_start;
323 	while (argc-- > 0) {
324 		size_t len;
325 		if (put_user((elf_addr_t)p, sp++))
326 			return -EFAULT;
327 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 		if (!len || len > MAX_ARG_STRLEN)
329 			return -EINVAL;
330 		p += len;
331 	}
332 	if (put_user(0, sp++))
333 		return -EFAULT;
334 	mm->arg_end = p;
335 
336 	/* Populate list of envp pointers back to envp strings. */
337 	mm->env_end = mm->env_start = p;
338 	while (envc-- > 0) {
339 		size_t len;
340 		if (put_user((elf_addr_t)p, sp++))
341 			return -EFAULT;
342 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
343 		if (!len || len > MAX_ARG_STRLEN)
344 			return -EINVAL;
345 		p += len;
346 	}
347 	if (put_user(0, sp++))
348 		return -EFAULT;
349 	mm->env_end = p;
350 
351 	/* Put the elf_info on the stack in the right place.  */
352 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
353 		return -EFAULT;
354 	return 0;
355 }
356 
357 /*
358  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
359  * into memory at "addr". (Note that p_filesz is rounded up to the
360  * next page, so any extra bytes from the file must be wiped.)
361  */
362 static unsigned long elf_map(struct file *filep, unsigned long addr,
363 		const struct elf_phdr *eppnt, int prot, int type,
364 		unsigned long total_size)
365 {
366 	unsigned long map_addr;
367 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
368 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
369 	addr = ELF_PAGESTART(addr);
370 	size = ELF_PAGEALIGN(size);
371 
372 	/* mmap() will return -EINVAL if given a zero size, but a
373 	 * segment with zero filesize is perfectly valid */
374 	if (!size)
375 		return addr;
376 
377 	/*
378 	* total_size is the size of the ELF (interpreter) image.
379 	* The _first_ mmap needs to know the full size, otherwise
380 	* randomization might put this image into an overlapping
381 	* position with the ELF binary image. (since size < total_size)
382 	* So we first map the 'big' image - and unmap the remainder at
383 	* the end. (which unmap is needed for ELF images with holes.)
384 	*/
385 	if (total_size) {
386 		total_size = ELF_PAGEALIGN(total_size);
387 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
388 		if (!BAD_ADDR(map_addr))
389 			vm_munmap(map_addr+size, total_size-size);
390 	} else
391 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
392 
393 	if ((type & MAP_FIXED_NOREPLACE) &&
394 	    PTR_ERR((void *)map_addr) == -EEXIST)
395 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
396 			task_pid_nr(current), current->comm, (void *)addr);
397 
398 	return(map_addr);
399 }
400 
401 /*
402  * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
403  * into memory at "addr". Memory from "p_filesz" through "p_memsz"
404  * rounded up to the next page is zeroed.
405  */
406 static unsigned long elf_load(struct file *filep, unsigned long addr,
407 		const struct elf_phdr *eppnt, int prot, int type,
408 		unsigned long total_size)
409 {
410 	unsigned long zero_start, zero_end;
411 	unsigned long map_addr;
412 
413 	if (eppnt->p_filesz) {
414 		map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
415 		if (BAD_ADDR(map_addr))
416 			return map_addr;
417 		if (eppnt->p_memsz > eppnt->p_filesz) {
418 			zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
419 				eppnt->p_filesz;
420 			zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
421 				eppnt->p_memsz;
422 
423 			/*
424 			 * Zero the end of the last mapped page but ignore
425 			 * any errors if the segment isn't writable.
426 			 */
427 			if (padzero(zero_start) && (prot & PROT_WRITE))
428 				return -EFAULT;
429 		}
430 	} else {
431 		map_addr = zero_start = ELF_PAGESTART(addr);
432 		zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
433 			eppnt->p_memsz;
434 	}
435 	if (eppnt->p_memsz > eppnt->p_filesz) {
436 		/*
437 		 * Map the last of the segment.
438 		 * If the header is requesting these pages to be
439 		 * executable, honour that (ppc32 needs this).
440 		 */
441 		int error;
442 
443 		zero_start = ELF_PAGEALIGN(zero_start);
444 		zero_end = ELF_PAGEALIGN(zero_end);
445 
446 		error = vm_brk_flags(zero_start, zero_end - zero_start,
447 				     prot & PROT_EXEC ? VM_EXEC : 0);
448 		if (error)
449 			map_addr = error;
450 	}
451 	return map_addr;
452 }
453 
454 
455 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
456 {
457 	elf_addr_t min_addr = -1;
458 	elf_addr_t max_addr = 0;
459 	bool pt_load = false;
460 	int i;
461 
462 	for (i = 0; i < nr; i++) {
463 		if (phdr[i].p_type == PT_LOAD) {
464 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
465 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
466 			pt_load = true;
467 		}
468 	}
469 	return pt_load ? (max_addr - min_addr) : 0;
470 }
471 
472 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
473 {
474 	ssize_t rv;
475 
476 	rv = kernel_read(file, buf, len, &pos);
477 	if (unlikely(rv != len)) {
478 		return (rv < 0) ? rv : -EIO;
479 	}
480 	return 0;
481 }
482 
483 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
484 {
485 	unsigned long alignment = 0;
486 	int i;
487 
488 	for (i = 0; i < nr; i++) {
489 		if (cmds[i].p_type == PT_LOAD) {
490 			unsigned long p_align = cmds[i].p_align;
491 
492 			/* skip non-power of two alignments as invalid */
493 			if (!is_power_of_2(p_align))
494 				continue;
495 			alignment = max(alignment, p_align);
496 		}
497 	}
498 
499 	/* ensure we align to at least one page */
500 	return ELF_PAGEALIGN(alignment);
501 }
502 
503 /**
504  * load_elf_phdrs() - load ELF program headers
505  * @elf_ex:   ELF header of the binary whose program headers should be loaded
506  * @elf_file: the opened ELF binary file
507  *
508  * Loads ELF program headers from the binary file elf_file, which has the ELF
509  * header pointed to by elf_ex, into a newly allocated array. The caller is
510  * responsible for freeing the allocated data. Returns NULL upon failure.
511  */
512 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
513 				       struct file *elf_file)
514 {
515 	struct elf_phdr *elf_phdata = NULL;
516 	int retval = -1;
517 	unsigned int size;
518 
519 	/*
520 	 * If the size of this structure has changed, then punt, since
521 	 * we will be doing the wrong thing.
522 	 */
523 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
524 		goto out;
525 
526 	/* Sanity check the number of program headers... */
527 	/* ...and their total size. */
528 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
529 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
530 		goto out;
531 
532 	elf_phdata = kmalloc(size, GFP_KERNEL);
533 	if (!elf_phdata)
534 		goto out;
535 
536 	/* Read in the program headers */
537 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
538 
539 out:
540 	if (retval) {
541 		kfree(elf_phdata);
542 		elf_phdata = NULL;
543 	}
544 	return elf_phdata;
545 }
546 
547 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
548 
549 /**
550  * struct arch_elf_state - arch-specific ELF loading state
551  *
552  * This structure is used to preserve architecture specific data during
553  * the loading of an ELF file, throughout the checking of architecture
554  * specific ELF headers & through to the point where the ELF load is
555  * known to be proceeding (ie. SET_PERSONALITY).
556  *
557  * This implementation is a dummy for architectures which require no
558  * specific state.
559  */
560 struct arch_elf_state {
561 };
562 
563 #define INIT_ARCH_ELF_STATE {}
564 
565 /**
566  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
567  * @ehdr:	The main ELF header
568  * @phdr:	The program header to check
569  * @elf:	The open ELF file
570  * @is_interp:	True if the phdr is from the interpreter of the ELF being
571  *		loaded, else false.
572  * @state:	Architecture-specific state preserved throughout the process
573  *		of loading the ELF.
574  *
575  * Inspects the program header phdr to validate its correctness and/or
576  * suitability for the system. Called once per ELF program header in the
577  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
578  * interpreter.
579  *
580  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
581  *         with that return code.
582  */
583 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
584 				   struct elf_phdr *phdr,
585 				   struct file *elf, bool is_interp,
586 				   struct arch_elf_state *state)
587 {
588 	/* Dummy implementation, always proceed */
589 	return 0;
590 }
591 
592 /**
593  * arch_check_elf() - check an ELF executable
594  * @ehdr:	The main ELF header
595  * @has_interp:	True if the ELF has an interpreter, else false.
596  * @interp_ehdr: The interpreter's ELF header
597  * @state:	Architecture-specific state preserved throughout the process
598  *		of loading the ELF.
599  *
600  * Provides a final opportunity for architecture code to reject the loading
601  * of the ELF & cause an exec syscall to return an error. This is called after
602  * all program headers to be checked by arch_elf_pt_proc have been.
603  *
604  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
605  *         with that return code.
606  */
607 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
608 				 struct elfhdr *interp_ehdr,
609 				 struct arch_elf_state *state)
610 {
611 	/* Dummy implementation, always proceed */
612 	return 0;
613 }
614 
615 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
616 
617 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
618 			    bool has_interp, bool is_interp)
619 {
620 	int prot = 0;
621 
622 	if (p_flags & PF_R)
623 		prot |= PROT_READ;
624 	if (p_flags & PF_W)
625 		prot |= PROT_WRITE;
626 	if (p_flags & PF_X)
627 		prot |= PROT_EXEC;
628 
629 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
630 }
631 
632 /* This is much more generalized than the library routine read function,
633    so we keep this separate.  Technically the library read function
634    is only provided so that we can read a.out libraries that have
635    an ELF header */
636 
637 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
638 		struct file *interpreter,
639 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
640 		struct arch_elf_state *arch_state)
641 {
642 	struct elf_phdr *eppnt;
643 	unsigned long load_addr = 0;
644 	int load_addr_set = 0;
645 	unsigned long error = ~0UL;
646 	unsigned long total_size;
647 	int i;
648 
649 	/* First of all, some simple consistency checks */
650 	if (interp_elf_ex->e_type != ET_EXEC &&
651 	    interp_elf_ex->e_type != ET_DYN)
652 		goto out;
653 	if (!elf_check_arch(interp_elf_ex) ||
654 	    elf_check_fdpic(interp_elf_ex))
655 		goto out;
656 	if (!interpreter->f_op->mmap)
657 		goto out;
658 
659 	total_size = total_mapping_size(interp_elf_phdata,
660 					interp_elf_ex->e_phnum);
661 	if (!total_size) {
662 		error = -EINVAL;
663 		goto out;
664 	}
665 
666 	eppnt = interp_elf_phdata;
667 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
668 		if (eppnt->p_type == PT_LOAD) {
669 			int elf_type = MAP_PRIVATE;
670 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
671 						 true, true);
672 			unsigned long vaddr = 0;
673 			unsigned long k, map_addr;
674 
675 			vaddr = eppnt->p_vaddr;
676 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
677 				elf_type |= MAP_FIXED;
678 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
679 				load_addr = -vaddr;
680 
681 			map_addr = elf_load(interpreter, load_addr + vaddr,
682 					eppnt, elf_prot, elf_type, total_size);
683 			total_size = 0;
684 			error = map_addr;
685 			if (BAD_ADDR(map_addr))
686 				goto out;
687 
688 			if (!load_addr_set &&
689 			    interp_elf_ex->e_type == ET_DYN) {
690 				load_addr = map_addr - ELF_PAGESTART(vaddr);
691 				load_addr_set = 1;
692 			}
693 
694 			/*
695 			 * Check to see if the section's size will overflow the
696 			 * allowed task size. Note that p_filesz must always be
697 			 * <= p_memsize so it's only necessary to check p_memsz.
698 			 */
699 			k = load_addr + eppnt->p_vaddr;
700 			if (BAD_ADDR(k) ||
701 			    eppnt->p_filesz > eppnt->p_memsz ||
702 			    eppnt->p_memsz > TASK_SIZE ||
703 			    TASK_SIZE - eppnt->p_memsz < k) {
704 				error = -ENOMEM;
705 				goto out;
706 			}
707 		}
708 	}
709 
710 	error = load_addr;
711 out:
712 	return error;
713 }
714 
715 /*
716  * These are the functions used to load ELF style executables and shared
717  * libraries.  There is no binary dependent code anywhere else.
718  */
719 
720 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
721 			      struct arch_elf_state *arch,
722 			      bool have_prev_type, u32 *prev_type)
723 {
724 	size_t o, step;
725 	const struct gnu_property *pr;
726 	int ret;
727 
728 	if (*off == datasz)
729 		return -ENOENT;
730 
731 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
732 		return -EIO;
733 	o = *off;
734 	datasz -= *off;
735 
736 	if (datasz < sizeof(*pr))
737 		return -ENOEXEC;
738 	pr = (const struct gnu_property *)(data + o);
739 	o += sizeof(*pr);
740 	datasz -= sizeof(*pr);
741 
742 	if (pr->pr_datasz > datasz)
743 		return -ENOEXEC;
744 
745 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
746 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
747 	if (step > datasz)
748 		return -ENOEXEC;
749 
750 	/* Properties are supposed to be unique and sorted on pr_type: */
751 	if (have_prev_type && pr->pr_type <= *prev_type)
752 		return -ENOEXEC;
753 	*prev_type = pr->pr_type;
754 
755 	ret = arch_parse_elf_property(pr->pr_type, data + o,
756 				      pr->pr_datasz, ELF_COMPAT, arch);
757 	if (ret)
758 		return ret;
759 
760 	*off = o + step;
761 	return 0;
762 }
763 
764 #define NOTE_DATA_SZ SZ_1K
765 #define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
766 
767 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
768 				struct arch_elf_state *arch)
769 {
770 	union {
771 		struct elf_note nhdr;
772 		char data[NOTE_DATA_SZ];
773 	} note;
774 	loff_t pos;
775 	ssize_t n;
776 	size_t off, datasz;
777 	int ret;
778 	bool have_prev_type;
779 	u32 prev_type;
780 
781 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
782 		return 0;
783 
784 	/* load_elf_binary() shouldn't call us unless this is true... */
785 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
786 		return -ENOEXEC;
787 
788 	/* If the properties are crazy large, that's too bad (for now): */
789 	if (phdr->p_filesz > sizeof(note))
790 		return -ENOEXEC;
791 
792 	pos = phdr->p_offset;
793 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
794 
795 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
796 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
797 		return -EIO;
798 
799 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
800 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
801 	    strncmp(note.data + sizeof(note.nhdr),
802 		    NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
803 		return -ENOEXEC;
804 
805 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
806 		       ELF_GNU_PROPERTY_ALIGN);
807 	if (off > n)
808 		return -ENOEXEC;
809 
810 	if (note.nhdr.n_descsz > n - off)
811 		return -ENOEXEC;
812 	datasz = off + note.nhdr.n_descsz;
813 
814 	have_prev_type = false;
815 	do {
816 		ret = parse_elf_property(note.data, &off, datasz, arch,
817 					 have_prev_type, &prev_type);
818 		have_prev_type = true;
819 	} while (!ret);
820 
821 	return ret == -ENOENT ? 0 : ret;
822 }
823 
824 static int load_elf_binary(struct linux_binprm *bprm)
825 {
826 	struct file *interpreter = NULL; /* to shut gcc up */
827 	unsigned long load_bias = 0, phdr_addr = 0;
828 	int first_pt_load = 1;
829 	unsigned long error;
830 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
831 	struct elf_phdr *elf_property_phdata = NULL;
832 	unsigned long elf_brk;
833 	int retval, i;
834 	unsigned long elf_entry;
835 	unsigned long e_entry;
836 	unsigned long interp_load_addr = 0;
837 	unsigned long start_code, end_code, start_data, end_data;
838 	unsigned long reloc_func_desc __maybe_unused = 0;
839 	int executable_stack = EXSTACK_DEFAULT;
840 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
841 	struct elfhdr *interp_elf_ex = NULL;
842 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
843 	struct mm_struct *mm;
844 	struct pt_regs *regs;
845 
846 	retval = -ENOEXEC;
847 	/* First of all, some simple consistency checks */
848 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
849 		goto out;
850 
851 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
852 		goto out;
853 	if (!elf_check_arch(elf_ex))
854 		goto out;
855 	if (elf_check_fdpic(elf_ex))
856 		goto out;
857 	if (!bprm->file->f_op->mmap)
858 		goto out;
859 
860 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
861 	if (!elf_phdata)
862 		goto out;
863 
864 	elf_ppnt = elf_phdata;
865 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
866 		char *elf_interpreter;
867 
868 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
869 			elf_property_phdata = elf_ppnt;
870 			continue;
871 		}
872 
873 		if (elf_ppnt->p_type != PT_INTERP)
874 			continue;
875 
876 		/*
877 		 * This is the program interpreter used for shared libraries -
878 		 * for now assume that this is an a.out format binary.
879 		 */
880 		retval = -ENOEXEC;
881 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
882 			goto out_free_ph;
883 
884 		retval = -ENOMEM;
885 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
886 		if (!elf_interpreter)
887 			goto out_free_ph;
888 
889 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
890 				  elf_ppnt->p_offset);
891 		if (retval < 0)
892 			goto out_free_interp;
893 		/* make sure path is NULL terminated */
894 		retval = -ENOEXEC;
895 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
896 			goto out_free_interp;
897 
898 		interpreter = open_exec(elf_interpreter);
899 		kfree(elf_interpreter);
900 		retval = PTR_ERR(interpreter);
901 		if (IS_ERR(interpreter))
902 			goto out_free_ph;
903 
904 		/*
905 		 * If the binary is not readable then enforce mm->dumpable = 0
906 		 * regardless of the interpreter's permissions.
907 		 */
908 		would_dump(bprm, interpreter);
909 
910 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
911 		if (!interp_elf_ex) {
912 			retval = -ENOMEM;
913 			goto out_free_file;
914 		}
915 
916 		/* Get the exec headers */
917 		retval = elf_read(interpreter, interp_elf_ex,
918 				  sizeof(*interp_elf_ex), 0);
919 		if (retval < 0)
920 			goto out_free_dentry;
921 
922 		break;
923 
924 out_free_interp:
925 		kfree(elf_interpreter);
926 		goto out_free_ph;
927 	}
928 
929 	elf_ppnt = elf_phdata;
930 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
931 		switch (elf_ppnt->p_type) {
932 		case PT_GNU_STACK:
933 			if (elf_ppnt->p_flags & PF_X)
934 				executable_stack = EXSTACK_ENABLE_X;
935 			else
936 				executable_stack = EXSTACK_DISABLE_X;
937 			break;
938 
939 		case PT_LOPROC ... PT_HIPROC:
940 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
941 						  bprm->file, false,
942 						  &arch_state);
943 			if (retval)
944 				goto out_free_dentry;
945 			break;
946 		}
947 
948 	/* Some simple consistency checks for the interpreter */
949 	if (interpreter) {
950 		retval = -ELIBBAD;
951 		/* Not an ELF interpreter */
952 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
953 			goto out_free_dentry;
954 		/* Verify the interpreter has a valid arch */
955 		if (!elf_check_arch(interp_elf_ex) ||
956 		    elf_check_fdpic(interp_elf_ex))
957 			goto out_free_dentry;
958 
959 		/* Load the interpreter program headers */
960 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
961 						   interpreter);
962 		if (!interp_elf_phdata)
963 			goto out_free_dentry;
964 
965 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
966 		elf_property_phdata = NULL;
967 		elf_ppnt = interp_elf_phdata;
968 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
969 			switch (elf_ppnt->p_type) {
970 			case PT_GNU_PROPERTY:
971 				elf_property_phdata = elf_ppnt;
972 				break;
973 
974 			case PT_LOPROC ... PT_HIPROC:
975 				retval = arch_elf_pt_proc(interp_elf_ex,
976 							  elf_ppnt, interpreter,
977 							  true, &arch_state);
978 				if (retval)
979 					goto out_free_dentry;
980 				break;
981 			}
982 	}
983 
984 	retval = parse_elf_properties(interpreter ?: bprm->file,
985 				      elf_property_phdata, &arch_state);
986 	if (retval)
987 		goto out_free_dentry;
988 
989 	/*
990 	 * Allow arch code to reject the ELF at this point, whilst it's
991 	 * still possible to return an error to the code that invoked
992 	 * the exec syscall.
993 	 */
994 	retval = arch_check_elf(elf_ex,
995 				!!interpreter, interp_elf_ex,
996 				&arch_state);
997 	if (retval)
998 		goto out_free_dentry;
999 
1000 	/* Flush all traces of the currently running executable */
1001 	retval = begin_new_exec(bprm);
1002 	if (retval)
1003 		goto out_free_dentry;
1004 
1005 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006 	   may depend on the personality.  */
1007 	SET_PERSONALITY2(*elf_ex, &arch_state);
1008 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1009 		current->personality |= READ_IMPLIES_EXEC;
1010 
1011 	const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1012 	if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1013 		current->flags |= PF_RANDOMIZE;
1014 
1015 	setup_new_exec(bprm);
1016 
1017 	/* Do this so that we can load the interpreter, if need be.  We will
1018 	   change some of these later */
1019 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1020 				 executable_stack);
1021 	if (retval < 0)
1022 		goto out_free_dentry;
1023 
1024 	elf_brk = 0;
1025 
1026 	start_code = ~0UL;
1027 	end_code = 0;
1028 	start_data = 0;
1029 	end_data = 0;
1030 
1031 	/* Now we do a little grungy work by mmapping the ELF image into
1032 	   the correct location in memory. */
1033 	for(i = 0, elf_ppnt = elf_phdata;
1034 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1035 		int elf_prot, elf_flags;
1036 		unsigned long k, vaddr;
1037 		unsigned long total_size = 0;
1038 		unsigned long alignment;
1039 
1040 		if (elf_ppnt->p_type != PT_LOAD)
1041 			continue;
1042 
1043 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1044 				     !!interpreter, false);
1045 
1046 		elf_flags = MAP_PRIVATE;
1047 
1048 		vaddr = elf_ppnt->p_vaddr;
1049 		/*
1050 		 * The first time through the loop, first_pt_load is true:
1051 		 * layout will be calculated. Once set, use MAP_FIXED since
1052 		 * we know we've already safely mapped the entire region with
1053 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1054 		 */
1055 		if (!first_pt_load) {
1056 			elf_flags |= MAP_FIXED;
1057 		} else if (elf_ex->e_type == ET_EXEC) {
1058 			/*
1059 			 * This logic is run once for the first LOAD Program
1060 			 * Header for ET_EXEC binaries. No special handling
1061 			 * is needed.
1062 			 */
1063 			elf_flags |= MAP_FIXED_NOREPLACE;
1064 		} else if (elf_ex->e_type == ET_DYN) {
1065 			/*
1066 			 * This logic is run once for the first LOAD Program
1067 			 * Header for ET_DYN binaries to calculate the
1068 			 * randomization (load_bias) for all the LOAD
1069 			 * Program Headers.
1070 			 */
1071 
1072 			/*
1073 			 * Calculate the entire size of the ELF mapping
1074 			 * (total_size), used for the initial mapping,
1075 			 * due to load_addr_set which is set to true later
1076 			 * once the initial mapping is performed.
1077 			 *
1078 			 * Note that this is only sensible when the LOAD
1079 			 * segments are contiguous (or overlapping). If
1080 			 * used for LOADs that are far apart, this would
1081 			 * cause the holes between LOADs to be mapped,
1082 			 * running the risk of having the mapping fail,
1083 			 * as it would be larger than the ELF file itself.
1084 			 *
1085 			 * As a result, only ET_DYN does this, since
1086 			 * some ET_EXEC (e.g. ia64) may have large virtual
1087 			 * memory holes between LOADs.
1088 			 *
1089 			 */
1090 			total_size = total_mapping_size(elf_phdata,
1091 							elf_ex->e_phnum);
1092 			if (!total_size) {
1093 				retval = -EINVAL;
1094 				goto out_free_dentry;
1095 			}
1096 
1097 			/* Calculate any requested alignment. */
1098 			alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1099 
1100 			/*
1101 			 * There are effectively two types of ET_DYN
1102 			 * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP)
1103 			 * and loaders (ET_DYN without PT_INTERP, since they
1104 			 * _are_ the ELF interpreter). The loaders must
1105 			 * be loaded away from programs since the program
1106 			 * may otherwise collide with the loader (especially
1107 			 * for ET_EXEC which does not have a randomized
1108 			 * position). For example to handle invocations of
1109 			 * "./ld.so someprog" to test out a new version of
1110 			 * the loader, the subsequent program that the
1111 			 * loader loads must avoid the loader itself, so
1112 			 * they cannot share the same load range. Sufficient
1113 			 * room for the brk must be allocated with the
1114 			 * loader as well, since brk must be available with
1115 			 * the loader.
1116 			 *
1117 			 * Therefore, programs are loaded offset from
1118 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1119 			 * independently randomized mmap region (0 load_bias
1120 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1121 			 */
1122 			if (interpreter) {
1123 				/* On ET_DYN with PT_INTERP, we do the ASLR. */
1124 				load_bias = ELF_ET_DYN_BASE;
1125 				if (current->flags & PF_RANDOMIZE)
1126 					load_bias += arch_mmap_rnd();
1127 				/* Adjust alignment as requested. */
1128 				if (alignment)
1129 					load_bias &= ~(alignment - 1);
1130 				elf_flags |= MAP_FIXED_NOREPLACE;
1131 			} else {
1132 				/*
1133 				 * For ET_DYN without PT_INTERP, we rely on
1134 				 * the architectures's (potentially ASLR) mmap
1135 				 * base address (via a load_bias of 0).
1136 				 *
1137 				 * When a large alignment is requested, we
1138 				 * must do the allocation at address "0" right
1139 				 * now to discover where things will load so
1140 				 * that we can adjust the resulting alignment.
1141 				 * In this case (load_bias != 0), we can use
1142 				 * MAP_FIXED_NOREPLACE to make sure the mapping
1143 				 * doesn't collide with anything.
1144 				 */
1145 				if (alignment > ELF_MIN_ALIGN) {
1146 					load_bias = elf_load(bprm->file, 0, elf_ppnt,
1147 							     elf_prot, elf_flags, total_size);
1148 					if (BAD_ADDR(load_bias)) {
1149 						retval = IS_ERR_VALUE(load_bias) ?
1150 							 PTR_ERR((void*)load_bias) : -EINVAL;
1151 						goto out_free_dentry;
1152 					}
1153 					vm_munmap(load_bias, total_size);
1154 					/* Adjust alignment as requested. */
1155 					if (alignment)
1156 						load_bias &= ~(alignment - 1);
1157 					elf_flags |= MAP_FIXED_NOREPLACE;
1158 				} else
1159 					load_bias = 0;
1160 			}
1161 
1162 			/*
1163 			 * Since load_bias is used for all subsequent loading
1164 			 * calculations, we must lower it by the first vaddr
1165 			 * so that the remaining calculations based on the
1166 			 * ELF vaddrs will be correctly offset. The result
1167 			 * is then page aligned.
1168 			 */
1169 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1170 		}
1171 
1172 		error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1173 				elf_prot, elf_flags, total_size);
1174 		if (BAD_ADDR(error)) {
1175 			retval = IS_ERR_VALUE(error) ?
1176 				PTR_ERR((void*)error) : -EINVAL;
1177 			goto out_free_dentry;
1178 		}
1179 
1180 		if (first_pt_load) {
1181 			first_pt_load = 0;
1182 			if (elf_ex->e_type == ET_DYN) {
1183 				load_bias += error -
1184 				             ELF_PAGESTART(load_bias + vaddr);
1185 				reloc_func_desc = load_bias;
1186 			}
1187 		}
1188 
1189 		/*
1190 		 * Figure out which segment in the file contains the Program
1191 		 * Header table, and map to the associated memory address.
1192 		 */
1193 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1194 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1195 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1196 				    elf_ppnt->p_vaddr;
1197 		}
1198 
1199 		k = elf_ppnt->p_vaddr;
1200 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1201 			start_code = k;
1202 		if (start_data < k)
1203 			start_data = k;
1204 
1205 		/*
1206 		 * Check to see if the section's size will overflow the
1207 		 * allowed task size. Note that p_filesz must always be
1208 		 * <= p_memsz so it is only necessary to check p_memsz.
1209 		 */
1210 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1211 		    elf_ppnt->p_memsz > TASK_SIZE ||
1212 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1213 			/* set_brk can never work. Avoid overflows. */
1214 			retval = -EINVAL;
1215 			goto out_free_dentry;
1216 		}
1217 
1218 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1219 
1220 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1221 			end_code = k;
1222 		if (end_data < k)
1223 			end_data = k;
1224 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1225 		if (k > elf_brk)
1226 			elf_brk = k;
1227 	}
1228 
1229 	e_entry = elf_ex->e_entry + load_bias;
1230 	phdr_addr += load_bias;
1231 	elf_brk += load_bias;
1232 	start_code += load_bias;
1233 	end_code += load_bias;
1234 	start_data += load_bias;
1235 	end_data += load_bias;
1236 
1237 	current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
1238 
1239 	if (interpreter) {
1240 		elf_entry = load_elf_interp(interp_elf_ex,
1241 					    interpreter,
1242 					    load_bias, interp_elf_phdata,
1243 					    &arch_state);
1244 		if (!IS_ERR_VALUE(elf_entry)) {
1245 			/*
1246 			 * load_elf_interp() returns relocation
1247 			 * adjustment
1248 			 */
1249 			interp_load_addr = elf_entry;
1250 			elf_entry += interp_elf_ex->e_entry;
1251 		}
1252 		if (BAD_ADDR(elf_entry)) {
1253 			retval = IS_ERR_VALUE(elf_entry) ?
1254 					(int)elf_entry : -EINVAL;
1255 			goto out_free_dentry;
1256 		}
1257 		reloc_func_desc = interp_load_addr;
1258 
1259 		exe_file_allow_write_access(interpreter);
1260 		fput(interpreter);
1261 
1262 		kfree(interp_elf_ex);
1263 		kfree(interp_elf_phdata);
1264 	} else {
1265 		elf_entry = e_entry;
1266 		if (BAD_ADDR(elf_entry)) {
1267 			retval = -EINVAL;
1268 			goto out_free_dentry;
1269 		}
1270 	}
1271 
1272 	kfree(elf_phdata);
1273 
1274 	set_binfmt(&elf_format);
1275 
1276 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1277 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1278 	if (retval < 0)
1279 		goto out;
1280 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1281 
1282 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1283 				   e_entry, phdr_addr);
1284 	if (retval < 0)
1285 		goto out;
1286 
1287 	mm = current->mm;
1288 	mm->end_code = end_code;
1289 	mm->start_code = start_code;
1290 	mm->start_data = start_data;
1291 	mm->end_data = end_data;
1292 	mm->start_stack = bprm->p;
1293 
1294 	if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) {
1295 		/*
1296 		 * For architectures with ELF randomization, when executing
1297 		 * a loader directly (i.e. no interpreter listed in ELF
1298 		 * headers), move the brk area out of the mmap region
1299 		 * (since it grows up, and may collide early with the stack
1300 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1301 		 */
1302 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1303 		    elf_ex->e_type == ET_DYN && !interpreter) {
1304 			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1305 		} else {
1306 			/* Otherwise leave a gap between .bss and brk. */
1307 			mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1308 		}
1309 
1310 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1311 #ifdef compat_brk_randomized
1312 		current->brk_randomized = 1;
1313 #endif
1314 	}
1315 
1316 	if (current->personality & MMAP_PAGE_ZERO) {
1317 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1318 		   and some applications "depend" upon this behavior.
1319 		   Since we do not have the power to recompile these, we
1320 		   emulate the SVr4 behavior. Sigh. */
1321 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1322 				MAP_FIXED | MAP_PRIVATE, 0);
1323 
1324 		retval = do_mseal(0, PAGE_SIZE, 0);
1325 		if (retval)
1326 			pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
1327 					    task_pid_nr(current), retval);
1328 	}
1329 
1330 	regs = current_pt_regs();
1331 #ifdef ELF_PLAT_INIT
1332 	/*
1333 	 * The ABI may specify that certain registers be set up in special
1334 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1335 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1336 	 * that the e_entry field is the address of the function descriptor
1337 	 * for the startup routine, rather than the address of the startup
1338 	 * routine itself.  This macro performs whatever initialization to
1339 	 * the regs structure is required as well as any relocations to the
1340 	 * function descriptor entries when executing dynamically links apps.
1341 	 */
1342 	ELF_PLAT_INIT(regs, reloc_func_desc);
1343 #endif
1344 
1345 	finalize_exec(bprm);
1346 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1347 	retval = 0;
1348 out:
1349 	return retval;
1350 
1351 	/* error cleanup */
1352 out_free_dentry:
1353 	kfree(interp_elf_ex);
1354 	kfree(interp_elf_phdata);
1355 out_free_file:
1356 	exe_file_allow_write_access(interpreter);
1357 	if (interpreter)
1358 		fput(interpreter);
1359 out_free_ph:
1360 	kfree(elf_phdata);
1361 	goto out;
1362 }
1363 
1364 #ifdef CONFIG_USELIB
1365 /* This is really simpleminded and specialized - we are loading an
1366    a.out library that is given an ELF header. */
1367 static int load_elf_library(struct file *file)
1368 {
1369 	struct elf_phdr *elf_phdata;
1370 	struct elf_phdr *eppnt;
1371 	int retval, error, i, j;
1372 	struct elfhdr elf_ex;
1373 
1374 	error = -ENOEXEC;
1375 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1376 	if (retval < 0)
1377 		goto out;
1378 
1379 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1380 		goto out;
1381 
1382 	/* First of all, some simple consistency checks */
1383 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1384 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1385 		goto out;
1386 	if (elf_check_fdpic(&elf_ex))
1387 		goto out;
1388 
1389 	/* Now read in all of the header information */
1390 
1391 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1392 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1393 
1394 	error = -ENOMEM;
1395 	elf_phdata = kmalloc(j, GFP_KERNEL);
1396 	if (!elf_phdata)
1397 		goto out;
1398 
1399 	eppnt = elf_phdata;
1400 	error = -ENOEXEC;
1401 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1402 	if (retval < 0)
1403 		goto out_free_ph;
1404 
1405 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1406 		if ((eppnt + i)->p_type == PT_LOAD)
1407 			j++;
1408 	if (j != 1)
1409 		goto out_free_ph;
1410 
1411 	while (eppnt->p_type != PT_LOAD)
1412 		eppnt++;
1413 
1414 	/* Now use mmap to map the library into memory. */
1415 	error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
1416 			eppnt,
1417 			PROT_READ | PROT_WRITE | PROT_EXEC,
1418 			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1419 			0);
1420 
1421 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1422 		goto out_free_ph;
1423 
1424 	error = 0;
1425 
1426 out_free_ph:
1427 	kfree(elf_phdata);
1428 out:
1429 	return error;
1430 }
1431 #endif /* #ifdef CONFIG_USELIB */
1432 
1433 #ifdef CONFIG_ELF_CORE
1434 /*
1435  * ELF core dumper
1436  *
1437  * Modelled on fs/exec.c:aout_core_dump()
1438  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1439  */
1440 
1441 /* An ELF note in memory */
1442 struct memelfnote
1443 {
1444 	const char *name;
1445 	int type;
1446 	unsigned int datasz;
1447 	void *data;
1448 };
1449 
1450 static int notesize(struct memelfnote *en)
1451 {
1452 	int sz;
1453 
1454 	sz = sizeof(struct elf_note);
1455 	sz += roundup(strlen(en->name) + 1, 4);
1456 	sz += roundup(en->datasz, 4);
1457 
1458 	return sz;
1459 }
1460 
1461 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1462 {
1463 	struct elf_note en;
1464 	en.n_namesz = strlen(men->name) + 1;
1465 	en.n_descsz = men->datasz;
1466 	en.n_type = men->type;
1467 
1468 	return dump_emit(cprm, &en, sizeof(en)) &&
1469 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1470 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1471 }
1472 
1473 static void fill_elf_header(struct elfhdr *elf, int segs,
1474 			    u16 machine, u32 flags)
1475 {
1476 	memset(elf, 0, sizeof(*elf));
1477 
1478 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1479 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1480 	elf->e_ident[EI_DATA] = ELF_DATA;
1481 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1482 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1483 
1484 	elf->e_type = ET_CORE;
1485 	elf->e_machine = machine;
1486 	elf->e_version = EV_CURRENT;
1487 	elf->e_phoff = sizeof(struct elfhdr);
1488 	elf->e_flags = flags;
1489 	elf->e_ehsize = sizeof(struct elfhdr);
1490 	elf->e_phentsize = sizeof(struct elf_phdr);
1491 	elf->e_phnum = segs;
1492 }
1493 
1494 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1495 {
1496 	phdr->p_type = PT_NOTE;
1497 	phdr->p_offset = offset;
1498 	phdr->p_vaddr = 0;
1499 	phdr->p_paddr = 0;
1500 	phdr->p_filesz = sz;
1501 	phdr->p_memsz = 0;
1502 	phdr->p_flags = 0;
1503 	phdr->p_align = 4;
1504 }
1505 
1506 static void fill_note(struct memelfnote *note, const char *name, int type,
1507 		unsigned int sz, void *data)
1508 {
1509 	note->name = name;
1510 	note->type = type;
1511 	note->datasz = sz;
1512 	note->data = data;
1513 }
1514 
1515 /*
1516  * fill up all the fields in prstatus from the given task struct, except
1517  * registers which need to be filled up separately.
1518  */
1519 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1520 		struct task_struct *p, long signr)
1521 {
1522 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1523 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1524 	prstatus->pr_sighold = p->blocked.sig[0];
1525 	rcu_read_lock();
1526 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1527 	rcu_read_unlock();
1528 	prstatus->pr_pid = task_pid_vnr(p);
1529 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1530 	prstatus->pr_sid = task_session_vnr(p);
1531 	if (thread_group_leader(p)) {
1532 		struct task_cputime cputime;
1533 
1534 		/*
1535 		 * This is the record for the group leader.  It shows the
1536 		 * group-wide total, not its individual thread total.
1537 		 */
1538 		thread_group_cputime(p, &cputime);
1539 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1540 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1541 	} else {
1542 		u64 utime, stime;
1543 
1544 		task_cputime(p, &utime, &stime);
1545 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1546 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1547 	}
1548 
1549 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1550 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1551 }
1552 
1553 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1554 		       struct mm_struct *mm)
1555 {
1556 	const struct cred *cred;
1557 	unsigned int i, len;
1558 	unsigned int state;
1559 
1560 	/* first copy the parameters from user space */
1561 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1562 
1563 	len = mm->arg_end - mm->arg_start;
1564 	if (len >= ELF_PRARGSZ)
1565 		len = ELF_PRARGSZ-1;
1566 	if (copy_from_user(&psinfo->pr_psargs,
1567 		           (const char __user *)mm->arg_start, len))
1568 		return -EFAULT;
1569 	for(i = 0; i < len; i++)
1570 		if (psinfo->pr_psargs[i] == 0)
1571 			psinfo->pr_psargs[i] = ' ';
1572 	psinfo->pr_psargs[len] = 0;
1573 
1574 	rcu_read_lock();
1575 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1576 	rcu_read_unlock();
1577 	psinfo->pr_pid = task_pid_vnr(p);
1578 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1579 	psinfo->pr_sid = task_session_vnr(p);
1580 
1581 	state = READ_ONCE(p->__state);
1582 	i = state ? ffz(~state) + 1 : 0;
1583 	psinfo->pr_state = i;
1584 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1585 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1586 	psinfo->pr_nice = task_nice(p);
1587 	psinfo->pr_flag = p->flags;
1588 	rcu_read_lock();
1589 	cred = __task_cred(p);
1590 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1591 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1592 	rcu_read_unlock();
1593 	get_task_comm(psinfo->pr_fname, p);
1594 
1595 	return 0;
1596 }
1597 
1598 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1599 {
1600 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1601 	int i = 0;
1602 	do
1603 		i += 2;
1604 	while (auxv[i - 2] != AT_NULL);
1605 	fill_note(note, NN_AUXV, NT_AUXV, i * sizeof(elf_addr_t), auxv);
1606 }
1607 
1608 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1609 		const kernel_siginfo_t *siginfo)
1610 {
1611 	copy_siginfo_to_external(csigdata, siginfo);
1612 	fill_note(note, NN_SIGINFO, NT_SIGINFO, sizeof(*csigdata), csigdata);
1613 }
1614 
1615 /*
1616  * Format of NT_FILE note:
1617  *
1618  * long count     -- how many files are mapped
1619  * long page_size -- units for file_ofs
1620  * array of [COUNT] elements of
1621  *   long start
1622  *   long end
1623  *   long file_ofs
1624  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1625  */
1626 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1627 {
1628 	unsigned count, size, names_ofs, remaining, n;
1629 	user_long_t *data;
1630 	user_long_t *start_end_ofs;
1631 	char *name_base, *name_curpos;
1632 	int i;
1633 
1634 	/* *Estimated* file count and total data size needed */
1635 	count = cprm->vma_count;
1636 	if (count > UINT_MAX / 64)
1637 		return -EINVAL;
1638 	size = count * 64;
1639 
1640 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1641  alloc:
1642 	/* paranoia check */
1643 	if (size >= core_file_note_size_limit) {
1644 		pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1645 			      size);
1646 		return -EINVAL;
1647 	}
1648 	size = round_up(size, PAGE_SIZE);
1649 	/*
1650 	 * "size" can be 0 here legitimately.
1651 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1652 	 */
1653 	data = kvmalloc(size, GFP_KERNEL);
1654 	if (ZERO_OR_NULL_PTR(data))
1655 		return -ENOMEM;
1656 
1657 	start_end_ofs = data + 2;
1658 	name_base = name_curpos = ((char *)data) + names_ofs;
1659 	remaining = size - names_ofs;
1660 	count = 0;
1661 	for (i = 0; i < cprm->vma_count; i++) {
1662 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1663 		struct file *file;
1664 		const char *filename;
1665 
1666 		file = m->file;
1667 		if (!file)
1668 			continue;
1669 		filename = file_path(file, name_curpos, remaining);
1670 		if (IS_ERR(filename)) {
1671 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1672 				kvfree(data);
1673 				size = size * 5 / 4;
1674 				goto alloc;
1675 			}
1676 			continue;
1677 		}
1678 
1679 		/* file_path() fills at the end, move name down */
1680 		/* n = strlen(filename) + 1: */
1681 		n = (name_curpos + remaining) - filename;
1682 		remaining = filename - name_curpos;
1683 		memmove(name_curpos, filename, n);
1684 		name_curpos += n;
1685 
1686 		*start_end_ofs++ = m->start;
1687 		*start_end_ofs++ = m->end;
1688 		*start_end_ofs++ = m->pgoff;
1689 		count++;
1690 	}
1691 
1692 	/* Now we know exact count of files, can store it */
1693 	data[0] = count;
1694 	data[1] = PAGE_SIZE;
1695 	/*
1696 	 * Count usually is less than mm->map_count,
1697 	 * we need to move filenames down.
1698 	 */
1699 	n = cprm->vma_count - count;
1700 	if (n != 0) {
1701 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1702 		memmove(name_base - shift_bytes, name_base,
1703 			name_curpos - name_base);
1704 		name_curpos -= shift_bytes;
1705 	}
1706 
1707 	size = name_curpos - (char *)data;
1708 	fill_note(note, NN_FILE, NT_FILE, size, data);
1709 	return 0;
1710 }
1711 
1712 #include <linux/regset.h>
1713 
1714 struct elf_thread_core_info {
1715 	struct elf_thread_core_info *next;
1716 	struct task_struct *task;
1717 	struct elf_prstatus prstatus;
1718 	struct memelfnote notes[];
1719 };
1720 
1721 struct elf_note_info {
1722 	struct elf_thread_core_info *thread;
1723 	struct memelfnote psinfo;
1724 	struct memelfnote signote;
1725 	struct memelfnote auxv;
1726 	struct memelfnote files;
1727 	user_siginfo_t csigdata;
1728 	size_t size;
1729 	int thread_notes;
1730 };
1731 
1732 #ifdef CORE_DUMP_USE_REGSET
1733 /*
1734  * When a regset has a writeback hook, we call it on each thread before
1735  * dumping user memory.  On register window machines, this makes sure the
1736  * user memory backing the register data is up to date before we read it.
1737  */
1738 static void do_thread_regset_writeback(struct task_struct *task,
1739 				       const struct user_regset *regset)
1740 {
1741 	if (regset->writeback)
1742 		regset->writeback(task, regset, 1);
1743 }
1744 
1745 #ifndef PRSTATUS_SIZE
1746 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1747 #endif
1748 
1749 #ifndef SET_PR_FPVALID
1750 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1751 #endif
1752 
1753 static int fill_thread_core_info(struct elf_thread_core_info *t,
1754 				 const struct user_regset_view *view,
1755 				 long signr, struct elf_note_info *info)
1756 {
1757 	unsigned int note_iter, view_iter;
1758 
1759 	/*
1760 	 * NT_PRSTATUS is the one special case, because the regset data
1761 	 * goes into the pr_reg field inside the note contents, rather
1762 	 * than being the whole note contents.  We fill the regset in here.
1763 	 * We assume that regset 0 is NT_PRSTATUS.
1764 	 */
1765 	fill_prstatus(&t->prstatus.common, t->task, signr);
1766 	regset_get(t->task, &view->regsets[0],
1767 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1768 
1769 	fill_note(&t->notes[0], NN_PRSTATUS, NT_PRSTATUS,
1770 		  PRSTATUS_SIZE, &t->prstatus);
1771 	info->size += notesize(&t->notes[0]);
1772 
1773 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1774 
1775 	/*
1776 	 * Each other regset might generate a note too.  For each regset
1777 	 * that has no core_note_type or is inactive, skip it.
1778 	 */
1779 	note_iter = 1;
1780 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1781 		const struct user_regset *regset = &view->regsets[view_iter];
1782 		int note_type = regset->core_note_type;
1783 		bool is_fpreg = note_type == NT_PRFPREG;
1784 		void *data;
1785 		int ret;
1786 
1787 		do_thread_regset_writeback(t->task, regset);
1788 		if (!note_type) // not for coredumps
1789 			continue;
1790 		if (regset->active && regset->active(t->task, regset) <= 0)
1791 			continue;
1792 
1793 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1794 		if (ret < 0)
1795 			continue;
1796 
1797 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1798 			break;
1799 
1800 		if (is_fpreg)
1801 			SET_PR_FPVALID(&t->prstatus);
1802 
1803 		fill_note(&t->notes[note_iter], is_fpreg ? NN_PRFPREG : "LINUX",
1804 			  note_type, ret, data);
1805 
1806 		info->size += notesize(&t->notes[note_iter]);
1807 		note_iter++;
1808 	}
1809 
1810 	return 1;
1811 }
1812 #else
1813 static int fill_thread_core_info(struct elf_thread_core_info *t,
1814 				 const struct user_regset_view *view,
1815 				 long signr, struct elf_note_info *info)
1816 {
1817 	struct task_struct *p = t->task;
1818 	elf_fpregset_t *fpu;
1819 
1820 	fill_prstatus(&t->prstatus.common, p, signr);
1821 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1822 
1823 	fill_note(&t->notes[0], NN_PRSTATUS, NT_PRSTATUS, sizeof(t->prstatus),
1824 		  &(t->prstatus));
1825 	info->size += notesize(&t->notes[0]);
1826 
1827 	fpu = kzalloc(sizeof(elf_fpregset_t), GFP_KERNEL);
1828 	if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1829 		kfree(fpu);
1830 		return 1;
1831 	}
1832 
1833 	t->prstatus.pr_fpvalid = 1;
1834 	fill_note(&t->notes[1], NN_PRFPREG, NT_PRFPREG, sizeof(*fpu), fpu);
1835 	info->size += notesize(&t->notes[1]);
1836 
1837 	return 1;
1838 }
1839 #endif
1840 
1841 static int fill_note_info(struct elfhdr *elf, int phdrs,
1842 			  struct elf_note_info *info,
1843 			  struct coredump_params *cprm)
1844 {
1845 	struct task_struct *dump_task = current;
1846 	const struct user_regset_view *view;
1847 	struct elf_thread_core_info *t;
1848 	struct elf_prpsinfo *psinfo;
1849 	struct core_thread *ct;
1850 
1851 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1852 	if (!psinfo)
1853 		return 0;
1854 	fill_note(&info->psinfo, NN_PRPSINFO, NT_PRPSINFO, sizeof(*psinfo), psinfo);
1855 
1856 #ifdef CORE_DUMP_USE_REGSET
1857 	view = task_user_regset_view(dump_task);
1858 
1859 	/*
1860 	 * Figure out how many notes we're going to need for each thread.
1861 	 */
1862 	info->thread_notes = 0;
1863 	for (int i = 0; i < view->n; ++i)
1864 		if (view->regsets[i].core_note_type != 0)
1865 			++info->thread_notes;
1866 
1867 	/*
1868 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1869 	 * since it is our one special case.
1870 	 */
1871 	if (unlikely(info->thread_notes == 0) ||
1872 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1873 		WARN_ON(1);
1874 		return 0;
1875 	}
1876 
1877 	/*
1878 	 * Initialize the ELF file header.
1879 	 */
1880 	fill_elf_header(elf, phdrs,
1881 			view->e_machine, view->e_flags);
1882 #else
1883 	view = NULL;
1884 	info->thread_notes = 2;
1885 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1886 #endif
1887 
1888 	/*
1889 	 * Allocate a structure for each thread.
1890 	 */
1891 	info->thread = kzalloc(offsetof(struct elf_thread_core_info,
1892 				     notes[info->thread_notes]),
1893 			    GFP_KERNEL);
1894 	if (unlikely(!info->thread))
1895 		return 0;
1896 
1897 	info->thread->task = dump_task;
1898 	for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1899 		t = kzalloc(offsetof(struct elf_thread_core_info,
1900 				     notes[info->thread_notes]),
1901 			    GFP_KERNEL);
1902 		if (unlikely(!t))
1903 			return 0;
1904 
1905 		t->task = ct->task;
1906 		t->next = info->thread->next;
1907 		info->thread->next = t;
1908 	}
1909 
1910 	/*
1911 	 * Now fill in each thread's information.
1912 	 */
1913 	for (t = info->thread; t != NULL; t = t->next)
1914 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1915 			return 0;
1916 
1917 	/*
1918 	 * Fill in the two process-wide notes.
1919 	 */
1920 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1921 	info->size += notesize(&info->psinfo);
1922 
1923 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1924 	info->size += notesize(&info->signote);
1925 
1926 	fill_auxv_note(&info->auxv, current->mm);
1927 	info->size += notesize(&info->auxv);
1928 
1929 	if (fill_files_note(&info->files, cprm) == 0)
1930 		info->size += notesize(&info->files);
1931 
1932 	return 1;
1933 }
1934 
1935 /*
1936  * Write all the notes for each thread.  When writing the first thread, the
1937  * process-wide notes are interleaved after the first thread-specific note.
1938  */
1939 static int write_note_info(struct elf_note_info *info,
1940 			   struct coredump_params *cprm)
1941 {
1942 	bool first = true;
1943 	struct elf_thread_core_info *t = info->thread;
1944 
1945 	do {
1946 		int i;
1947 
1948 		if (!writenote(&t->notes[0], cprm))
1949 			return 0;
1950 
1951 		if (first && !writenote(&info->psinfo, cprm))
1952 			return 0;
1953 		if (first && !writenote(&info->signote, cprm))
1954 			return 0;
1955 		if (first && !writenote(&info->auxv, cprm))
1956 			return 0;
1957 		if (first && info->files.data &&
1958 				!writenote(&info->files, cprm))
1959 			return 0;
1960 
1961 		for (i = 1; i < info->thread_notes; ++i)
1962 			if (t->notes[i].data &&
1963 			    !writenote(&t->notes[i], cprm))
1964 				return 0;
1965 
1966 		first = false;
1967 		t = t->next;
1968 	} while (t);
1969 
1970 	return 1;
1971 }
1972 
1973 static void free_note_info(struct elf_note_info *info)
1974 {
1975 	struct elf_thread_core_info *threads = info->thread;
1976 	while (threads) {
1977 		unsigned int i;
1978 		struct elf_thread_core_info *t = threads;
1979 		threads = t->next;
1980 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1981 		for (i = 1; i < info->thread_notes; ++i)
1982 			kvfree(t->notes[i].data);
1983 		kfree(t);
1984 	}
1985 	kfree(info->psinfo.data);
1986 	kvfree(info->files.data);
1987 }
1988 
1989 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1990 			     elf_addr_t e_shoff, int segs)
1991 {
1992 	elf->e_shoff = e_shoff;
1993 	elf->e_shentsize = sizeof(*shdr4extnum);
1994 	elf->e_shnum = 1;
1995 	elf->e_shstrndx = SHN_UNDEF;
1996 
1997 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1998 
1999 	shdr4extnum->sh_type = SHT_NULL;
2000 	shdr4extnum->sh_size = elf->e_shnum;
2001 	shdr4extnum->sh_link = elf->e_shstrndx;
2002 	shdr4extnum->sh_info = segs;
2003 }
2004 
2005 /*
2006  * Actual dumper
2007  *
2008  * This is a two-pass process; first we find the offsets of the bits,
2009  * and then they are actually written out.  If we run out of core limit
2010  * we just truncate.
2011  */
2012 static int elf_core_dump(struct coredump_params *cprm)
2013 {
2014 	int has_dumped = 0;
2015 	int segs, i;
2016 	struct elfhdr elf;
2017 	loff_t offset = 0, dataoff;
2018 	struct elf_note_info info = { };
2019 	struct elf_phdr *phdr4note = NULL;
2020 	struct elf_shdr *shdr4extnum = NULL;
2021 	Elf_Half e_phnum;
2022 	elf_addr_t e_shoff;
2023 
2024 	/*
2025 	 * The number of segs are recored into ELF header as 16bit value.
2026 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2027 	 */
2028 	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
2029 
2030 	/* for notes section */
2031 	segs++;
2032 
2033 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2034 	 * this, kernel supports extended numbering. Have a look at
2035 	 * include/linux/elf.h for further information. */
2036 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2037 
2038 	/*
2039 	 * Collect all the non-memory information about the process for the
2040 	 * notes.  This also sets up the file header.
2041 	 */
2042 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2043 		goto end_coredump;
2044 
2045 	has_dumped = 1;
2046 
2047 	offset += sizeof(elf);				/* ELF header */
2048 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2049 
2050 	/* Write notes phdr entry */
2051 	{
2052 		size_t sz = info.size;
2053 
2054 		/* For cell spufs and x86 xstate */
2055 		sz += elf_coredump_extra_notes_size();
2056 
2057 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2058 		if (!phdr4note)
2059 			goto end_coredump;
2060 
2061 		fill_elf_note_phdr(phdr4note, sz, offset);
2062 		offset += sz;
2063 	}
2064 
2065 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2066 
2067 	offset += cprm->vma_data_size;
2068 	offset += elf_core_extra_data_size(cprm);
2069 	e_shoff = offset;
2070 
2071 	if (e_phnum == PN_XNUM) {
2072 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2073 		if (!shdr4extnum)
2074 			goto end_coredump;
2075 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2076 	}
2077 
2078 	offset = dataoff;
2079 
2080 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2081 		goto end_coredump;
2082 
2083 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2084 		goto end_coredump;
2085 
2086 	/* Write program headers for segments dump */
2087 	for (i = 0; i < cprm->vma_count; i++) {
2088 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2089 		struct elf_phdr phdr;
2090 
2091 		phdr.p_type = PT_LOAD;
2092 		phdr.p_offset = offset;
2093 		phdr.p_vaddr = meta->start;
2094 		phdr.p_paddr = 0;
2095 		phdr.p_filesz = meta->dump_size;
2096 		phdr.p_memsz = meta->end - meta->start;
2097 		offset += phdr.p_filesz;
2098 		phdr.p_flags = 0;
2099 		if (meta->flags & VM_READ)
2100 			phdr.p_flags |= PF_R;
2101 		if (meta->flags & VM_WRITE)
2102 			phdr.p_flags |= PF_W;
2103 		if (meta->flags & VM_EXEC)
2104 			phdr.p_flags |= PF_X;
2105 		phdr.p_align = ELF_EXEC_PAGESIZE;
2106 
2107 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2108 			goto end_coredump;
2109 	}
2110 
2111 	if (!elf_core_write_extra_phdrs(cprm, offset))
2112 		goto end_coredump;
2113 
2114 	/* write out the notes section */
2115 	if (!write_note_info(&info, cprm))
2116 		goto end_coredump;
2117 
2118 	/* For cell spufs and x86 xstate */
2119 	if (elf_coredump_extra_notes_write(cprm))
2120 		goto end_coredump;
2121 
2122 	/* Align to page */
2123 	dump_skip_to(cprm, dataoff);
2124 
2125 	for (i = 0; i < cprm->vma_count; i++) {
2126 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2127 
2128 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2129 			goto end_coredump;
2130 	}
2131 
2132 	if (!elf_core_write_extra_data(cprm))
2133 		goto end_coredump;
2134 
2135 	if (e_phnum == PN_XNUM) {
2136 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2137 			goto end_coredump;
2138 	}
2139 
2140 end_coredump:
2141 	free_note_info(&info);
2142 	kfree(shdr4extnum);
2143 	kfree(phdr4note);
2144 	return has_dumped;
2145 }
2146 
2147 #endif		/* CONFIG_ELF_CORE */
2148 
2149 static int __init init_elf_binfmt(void)
2150 {
2151 	register_binfmt(&elf_format);
2152 	return 0;
2153 }
2154 
2155 static void __exit exit_elf_binfmt(void)
2156 {
2157 	/* Remove the COFF and ELF loaders. */
2158 	unregister_binfmt(&elf_format);
2159 }
2160 
2161 core_initcall(init_elf_binfmt);
2162 module_exit(exit_elf_binfmt);
2163 
2164 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2165 #include "tests/binfmt_elf_kunit.c"
2166 #endif
2167