xref: /linux/fs/binfmt_elf.c (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <asm/param.h>
50 #include <asm/page.h>
51 
52 #ifndef ELF_COMPAT
53 #define ELF_COMPAT 0
54 #endif
55 
56 #ifndef user_long_t
57 #define user_long_t long
58 #endif
59 #ifndef user_siginfo_t
60 #define user_siginfo_t siginfo_t
61 #endif
62 
63 /* That's for binfmt_elf_fdpic to deal with */
64 #ifndef elf_check_fdpic
65 #define elf_check_fdpic(ex) false
66 #endif
67 
68 static int load_elf_binary(struct linux_binprm *bprm);
69 
70 #ifdef CONFIG_USELIB
71 static int load_elf_library(struct file *);
72 #else
73 #define load_elf_library NULL
74 #endif
75 
76 /*
77  * If we don't support core dumping, then supply a NULL so we
78  * don't even try.
79  */
80 #ifdef CONFIG_ELF_CORE
81 static int elf_core_dump(struct coredump_params *cprm);
82 #else
83 #define elf_core_dump	NULL
84 #endif
85 
86 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
87 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
88 #else
89 #define ELF_MIN_ALIGN	PAGE_SIZE
90 #endif
91 
92 #ifndef ELF_CORE_EFLAGS
93 #define ELF_CORE_EFLAGS	0
94 #endif
95 
96 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
97 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
98 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
99 
100 static struct linux_binfmt elf_format = {
101 	.module		= THIS_MODULE,
102 	.load_binary	= load_elf_binary,
103 	.load_shlib	= load_elf_library,
104 #ifdef CONFIG_COREDUMP
105 	.core_dump	= elf_core_dump,
106 	.min_coredump	= ELF_EXEC_PAGESIZE,
107 #endif
108 };
109 
110 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
111 
112 static int set_brk(unsigned long start, unsigned long end, int prot)
113 {
114 	start = ELF_PAGEALIGN(start);
115 	end = ELF_PAGEALIGN(end);
116 	if (end > start) {
117 		/*
118 		 * Map the last of the bss segment.
119 		 * If the header is requesting these pages to be
120 		 * executable, honour that (ppc32 needs this).
121 		 */
122 		int error = vm_brk_flags(start, end - start,
123 				prot & PROT_EXEC ? VM_EXEC : 0);
124 		if (error)
125 			return error;
126 	}
127 	current->mm->start_brk = current->mm->brk = end;
128 	return 0;
129 }
130 
131 /* We need to explicitly zero any fractional pages
132    after the data section (i.e. bss).  This would
133    contain the junk from the file that should not
134    be in memory
135  */
136 static int padzero(unsigned long elf_bss)
137 {
138 	unsigned long nbyte;
139 
140 	nbyte = ELF_PAGEOFFSET(elf_bss);
141 	if (nbyte) {
142 		nbyte = ELF_MIN_ALIGN - nbyte;
143 		if (clear_user((void __user *) elf_bss, nbyte))
144 			return -EFAULT;
145 	}
146 	return 0;
147 }
148 
149 /* Let's use some macros to make this stack manipulation a little clearer */
150 #ifdef CONFIG_STACK_GROWSUP
151 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
152 #define STACK_ROUND(sp, items) \
153 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
154 #define STACK_ALLOC(sp, len) ({ \
155 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
156 	old_sp; })
157 #else
158 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
159 #define STACK_ROUND(sp, items) \
160 	(((unsigned long) (sp - items)) &~ 15UL)
161 #define STACK_ALLOC(sp, len) (sp -= len)
162 #endif
163 
164 #ifndef ELF_BASE_PLATFORM
165 /*
166  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
167  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
168  * will be copied to the user stack in the same manner as AT_PLATFORM.
169  */
170 #define ELF_BASE_PLATFORM NULL
171 #endif
172 
173 static int
174 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
175 		unsigned long interp_load_addr,
176 		unsigned long e_entry, unsigned long phdr_addr)
177 {
178 	struct mm_struct *mm = current->mm;
179 	unsigned long p = bprm->p;
180 	int argc = bprm->argc;
181 	int envc = bprm->envc;
182 	elf_addr_t __user *sp;
183 	elf_addr_t __user *u_platform;
184 	elf_addr_t __user *u_base_platform;
185 	elf_addr_t __user *u_rand_bytes;
186 	const char *k_platform = ELF_PLATFORM;
187 	const char *k_base_platform = ELF_BASE_PLATFORM;
188 	unsigned char k_rand_bytes[16];
189 	int items;
190 	elf_addr_t *elf_info;
191 	elf_addr_t flags = 0;
192 	int ei_index;
193 	const struct cred *cred = current_cred();
194 	struct vm_area_struct *vma;
195 
196 	/*
197 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
198 	 * evictions by the processes running on the same package. One
199 	 * thing we can do is to shuffle the initial stack for them.
200 	 */
201 
202 	p = arch_align_stack(p);
203 
204 	/*
205 	 * If this architecture has a platform capability string, copy it
206 	 * to userspace.  In some cases (Sparc), this info is impossible
207 	 * for userspace to get any other way, in others (i386) it is
208 	 * merely difficult.
209 	 */
210 	u_platform = NULL;
211 	if (k_platform) {
212 		size_t len = strlen(k_platform) + 1;
213 
214 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
215 		if (copy_to_user(u_platform, k_platform, len))
216 			return -EFAULT;
217 	}
218 
219 	/*
220 	 * If this architecture has a "base" platform capability
221 	 * string, copy it to userspace.
222 	 */
223 	u_base_platform = NULL;
224 	if (k_base_platform) {
225 		size_t len = strlen(k_base_platform) + 1;
226 
227 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
228 		if (copy_to_user(u_base_platform, k_base_platform, len))
229 			return -EFAULT;
230 	}
231 
232 	/*
233 	 * Generate 16 random bytes for userspace PRNG seeding.
234 	 */
235 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
236 	u_rand_bytes = (elf_addr_t __user *)
237 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
238 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
239 		return -EFAULT;
240 
241 	/* Create the ELF interpreter info */
242 	elf_info = (elf_addr_t *)mm->saved_auxv;
243 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
244 #define NEW_AUX_ENT(id, val) \
245 	do { \
246 		*elf_info++ = id; \
247 		*elf_info++ = val; \
248 	} while (0)
249 
250 #ifdef ARCH_DLINFO
251 	/*
252 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
253 	 * AUXV.
254 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
255 	 * ARCH_DLINFO changes
256 	 */
257 	ARCH_DLINFO;
258 #endif
259 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
260 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
261 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
262 	NEW_AUX_ENT(AT_PHDR, phdr_addr);
263 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
264 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
265 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
266 	if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
267 		flags |= AT_FLAGS_PRESERVE_ARGV0;
268 	NEW_AUX_ENT(AT_FLAGS, flags);
269 	NEW_AUX_ENT(AT_ENTRY, e_entry);
270 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
271 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
272 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
273 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
274 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
275 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
276 #ifdef ELF_HWCAP2
277 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
278 #endif
279 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
280 	if (k_platform) {
281 		NEW_AUX_ENT(AT_PLATFORM,
282 			    (elf_addr_t)(unsigned long)u_platform);
283 	}
284 	if (k_base_platform) {
285 		NEW_AUX_ENT(AT_BASE_PLATFORM,
286 			    (elf_addr_t)(unsigned long)u_base_platform);
287 	}
288 	if (bprm->have_execfd) {
289 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
290 	}
291 #undef NEW_AUX_ENT
292 	/* AT_NULL is zero; clear the rest too */
293 	memset(elf_info, 0, (char *)mm->saved_auxv +
294 			sizeof(mm->saved_auxv) - (char *)elf_info);
295 
296 	/* And advance past the AT_NULL entry.  */
297 	elf_info += 2;
298 
299 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
300 	sp = STACK_ADD(p, ei_index);
301 
302 	items = (argc + 1) + (envc + 1) + 1;
303 	bprm->p = STACK_ROUND(sp, items);
304 
305 	/* Point sp at the lowest address on the stack */
306 #ifdef CONFIG_STACK_GROWSUP
307 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
308 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
309 #else
310 	sp = (elf_addr_t __user *)bprm->p;
311 #endif
312 
313 
314 	/*
315 	 * Grow the stack manually; some architectures have a limit on how
316 	 * far ahead a user-space access may be in order to grow the stack.
317 	 */
318 	if (mmap_read_lock_killable(mm))
319 		return -EINTR;
320 	vma = find_extend_vma(mm, bprm->p);
321 	mmap_read_unlock(mm);
322 	if (!vma)
323 		return -EFAULT;
324 
325 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
326 	if (put_user(argc, sp++))
327 		return -EFAULT;
328 
329 	/* Populate list of argv pointers back to argv strings. */
330 	p = mm->arg_end = mm->arg_start;
331 	while (argc-- > 0) {
332 		size_t len;
333 		if (put_user((elf_addr_t)p, sp++))
334 			return -EFAULT;
335 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
336 		if (!len || len > MAX_ARG_STRLEN)
337 			return -EINVAL;
338 		p += len;
339 	}
340 	if (put_user(0, sp++))
341 		return -EFAULT;
342 	mm->arg_end = p;
343 
344 	/* Populate list of envp pointers back to envp strings. */
345 	mm->env_end = mm->env_start = p;
346 	while (envc-- > 0) {
347 		size_t len;
348 		if (put_user((elf_addr_t)p, sp++))
349 			return -EFAULT;
350 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
351 		if (!len || len > MAX_ARG_STRLEN)
352 			return -EINVAL;
353 		p += len;
354 	}
355 	if (put_user(0, sp++))
356 		return -EFAULT;
357 	mm->env_end = p;
358 
359 	/* Put the elf_info on the stack in the right place.  */
360 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
361 		return -EFAULT;
362 	return 0;
363 }
364 
365 static unsigned long elf_map(struct file *filep, unsigned long addr,
366 		const struct elf_phdr *eppnt, int prot, int type,
367 		unsigned long total_size)
368 {
369 	unsigned long map_addr;
370 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
371 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
372 	addr = ELF_PAGESTART(addr);
373 	size = ELF_PAGEALIGN(size);
374 
375 	/* mmap() will return -EINVAL if given a zero size, but a
376 	 * segment with zero filesize is perfectly valid */
377 	if (!size)
378 		return addr;
379 
380 	/*
381 	* total_size is the size of the ELF (interpreter) image.
382 	* The _first_ mmap needs to know the full size, otherwise
383 	* randomization might put this image into an overlapping
384 	* position with the ELF binary image. (since size < total_size)
385 	* So we first map the 'big' image - and unmap the remainder at
386 	* the end. (which unmap is needed for ELF images with holes.)
387 	*/
388 	if (total_size) {
389 		total_size = ELF_PAGEALIGN(total_size);
390 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
391 		if (!BAD_ADDR(map_addr))
392 			vm_munmap(map_addr+size, total_size-size);
393 	} else
394 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
395 
396 	if ((type & MAP_FIXED_NOREPLACE) &&
397 	    PTR_ERR((void *)map_addr) == -EEXIST)
398 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
399 			task_pid_nr(current), current->comm, (void *)addr);
400 
401 	return(map_addr);
402 }
403 
404 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
405 {
406 	elf_addr_t min_addr = -1;
407 	elf_addr_t max_addr = 0;
408 	bool pt_load = false;
409 	int i;
410 
411 	for (i = 0; i < nr; i++) {
412 		if (phdr[i].p_type == PT_LOAD) {
413 			min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
414 			max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
415 			pt_load = true;
416 		}
417 	}
418 	return pt_load ? (max_addr - min_addr) : 0;
419 }
420 
421 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
422 {
423 	ssize_t rv;
424 
425 	rv = kernel_read(file, buf, len, &pos);
426 	if (unlikely(rv != len)) {
427 		return (rv < 0) ? rv : -EIO;
428 	}
429 	return 0;
430 }
431 
432 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
433 {
434 	unsigned long alignment = 0;
435 	int i;
436 
437 	for (i = 0; i < nr; i++) {
438 		if (cmds[i].p_type == PT_LOAD) {
439 			unsigned long p_align = cmds[i].p_align;
440 
441 			/* skip non-power of two alignments as invalid */
442 			if (!is_power_of_2(p_align))
443 				continue;
444 			alignment = max(alignment, p_align);
445 		}
446 	}
447 
448 	/* ensure we align to at least one page */
449 	return ELF_PAGEALIGN(alignment);
450 }
451 
452 /**
453  * load_elf_phdrs() - load ELF program headers
454  * @elf_ex:   ELF header of the binary whose program headers should be loaded
455  * @elf_file: the opened ELF binary file
456  *
457  * Loads ELF program headers from the binary file elf_file, which has the ELF
458  * header pointed to by elf_ex, into a newly allocated array. The caller is
459  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
460  */
461 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
462 				       struct file *elf_file)
463 {
464 	struct elf_phdr *elf_phdata = NULL;
465 	int retval, err = -1;
466 	unsigned int size;
467 
468 	/*
469 	 * If the size of this structure has changed, then punt, since
470 	 * we will be doing the wrong thing.
471 	 */
472 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
473 		goto out;
474 
475 	/* Sanity check the number of program headers... */
476 	/* ...and their total size. */
477 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
478 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
479 		goto out;
480 
481 	elf_phdata = kmalloc(size, GFP_KERNEL);
482 	if (!elf_phdata)
483 		goto out;
484 
485 	/* Read in the program headers */
486 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
487 	if (retval < 0) {
488 		err = retval;
489 		goto out;
490 	}
491 
492 	/* Success! */
493 	err = 0;
494 out:
495 	if (err) {
496 		kfree(elf_phdata);
497 		elf_phdata = NULL;
498 	}
499 	return elf_phdata;
500 }
501 
502 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
503 
504 /**
505  * struct arch_elf_state - arch-specific ELF loading state
506  *
507  * This structure is used to preserve architecture specific data during
508  * the loading of an ELF file, throughout the checking of architecture
509  * specific ELF headers & through to the point where the ELF load is
510  * known to be proceeding (ie. SET_PERSONALITY).
511  *
512  * This implementation is a dummy for architectures which require no
513  * specific state.
514  */
515 struct arch_elf_state {
516 };
517 
518 #define INIT_ARCH_ELF_STATE {}
519 
520 /**
521  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
522  * @ehdr:	The main ELF header
523  * @phdr:	The program header to check
524  * @elf:	The open ELF file
525  * @is_interp:	True if the phdr is from the interpreter of the ELF being
526  *		loaded, else false.
527  * @state:	Architecture-specific state preserved throughout the process
528  *		of loading the ELF.
529  *
530  * Inspects the program header phdr to validate its correctness and/or
531  * suitability for the system. Called once per ELF program header in the
532  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
533  * interpreter.
534  *
535  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
536  *         with that return code.
537  */
538 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
539 				   struct elf_phdr *phdr,
540 				   struct file *elf, bool is_interp,
541 				   struct arch_elf_state *state)
542 {
543 	/* Dummy implementation, always proceed */
544 	return 0;
545 }
546 
547 /**
548  * arch_check_elf() - check an ELF executable
549  * @ehdr:	The main ELF header
550  * @has_interp:	True if the ELF has an interpreter, else false.
551  * @interp_ehdr: The interpreter's ELF header
552  * @state:	Architecture-specific state preserved throughout the process
553  *		of loading the ELF.
554  *
555  * Provides a final opportunity for architecture code to reject the loading
556  * of the ELF & cause an exec syscall to return an error. This is called after
557  * all program headers to be checked by arch_elf_pt_proc have been.
558  *
559  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
560  *         with that return code.
561  */
562 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
563 				 struct elfhdr *interp_ehdr,
564 				 struct arch_elf_state *state)
565 {
566 	/* Dummy implementation, always proceed */
567 	return 0;
568 }
569 
570 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
571 
572 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
573 			    bool has_interp, bool is_interp)
574 {
575 	int prot = 0;
576 
577 	if (p_flags & PF_R)
578 		prot |= PROT_READ;
579 	if (p_flags & PF_W)
580 		prot |= PROT_WRITE;
581 	if (p_flags & PF_X)
582 		prot |= PROT_EXEC;
583 
584 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
585 }
586 
587 /* This is much more generalized than the library routine read function,
588    so we keep this separate.  Technically the library read function
589    is only provided so that we can read a.out libraries that have
590    an ELF header */
591 
592 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
593 		struct file *interpreter,
594 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
595 		struct arch_elf_state *arch_state)
596 {
597 	struct elf_phdr *eppnt;
598 	unsigned long load_addr = 0;
599 	int load_addr_set = 0;
600 	unsigned long last_bss = 0, elf_bss = 0;
601 	int bss_prot = 0;
602 	unsigned long error = ~0UL;
603 	unsigned long total_size;
604 	int i;
605 
606 	/* First of all, some simple consistency checks */
607 	if (interp_elf_ex->e_type != ET_EXEC &&
608 	    interp_elf_ex->e_type != ET_DYN)
609 		goto out;
610 	if (!elf_check_arch(interp_elf_ex) ||
611 	    elf_check_fdpic(interp_elf_ex))
612 		goto out;
613 	if (!interpreter->f_op->mmap)
614 		goto out;
615 
616 	total_size = total_mapping_size(interp_elf_phdata,
617 					interp_elf_ex->e_phnum);
618 	if (!total_size) {
619 		error = -EINVAL;
620 		goto out;
621 	}
622 
623 	eppnt = interp_elf_phdata;
624 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
625 		if (eppnt->p_type == PT_LOAD) {
626 			int elf_type = MAP_PRIVATE;
627 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
628 						 true, true);
629 			unsigned long vaddr = 0;
630 			unsigned long k, map_addr;
631 
632 			vaddr = eppnt->p_vaddr;
633 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
634 				elf_type |= MAP_FIXED;
635 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
636 				load_addr = -vaddr;
637 
638 			map_addr = elf_map(interpreter, load_addr + vaddr,
639 					eppnt, elf_prot, elf_type, total_size);
640 			total_size = 0;
641 			error = map_addr;
642 			if (BAD_ADDR(map_addr))
643 				goto out;
644 
645 			if (!load_addr_set &&
646 			    interp_elf_ex->e_type == ET_DYN) {
647 				load_addr = map_addr - ELF_PAGESTART(vaddr);
648 				load_addr_set = 1;
649 			}
650 
651 			/*
652 			 * Check to see if the section's size will overflow the
653 			 * allowed task size. Note that p_filesz must always be
654 			 * <= p_memsize so it's only necessary to check p_memsz.
655 			 */
656 			k = load_addr + eppnt->p_vaddr;
657 			if (BAD_ADDR(k) ||
658 			    eppnt->p_filesz > eppnt->p_memsz ||
659 			    eppnt->p_memsz > TASK_SIZE ||
660 			    TASK_SIZE - eppnt->p_memsz < k) {
661 				error = -ENOMEM;
662 				goto out;
663 			}
664 
665 			/*
666 			 * Find the end of the file mapping for this phdr, and
667 			 * keep track of the largest address we see for this.
668 			 */
669 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
670 			if (k > elf_bss)
671 				elf_bss = k;
672 
673 			/*
674 			 * Do the same thing for the memory mapping - between
675 			 * elf_bss and last_bss is the bss section.
676 			 */
677 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
678 			if (k > last_bss) {
679 				last_bss = k;
680 				bss_prot = elf_prot;
681 			}
682 		}
683 	}
684 
685 	/*
686 	 * Now fill out the bss section: first pad the last page from
687 	 * the file up to the page boundary, and zero it from elf_bss
688 	 * up to the end of the page.
689 	 */
690 	if (padzero(elf_bss)) {
691 		error = -EFAULT;
692 		goto out;
693 	}
694 	/*
695 	 * Next, align both the file and mem bss up to the page size,
696 	 * since this is where elf_bss was just zeroed up to, and where
697 	 * last_bss will end after the vm_brk_flags() below.
698 	 */
699 	elf_bss = ELF_PAGEALIGN(elf_bss);
700 	last_bss = ELF_PAGEALIGN(last_bss);
701 	/* Finally, if there is still more bss to allocate, do it. */
702 	if (last_bss > elf_bss) {
703 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
704 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
705 		if (error)
706 			goto out;
707 	}
708 
709 	error = load_addr;
710 out:
711 	return error;
712 }
713 
714 /*
715  * These are the functions used to load ELF style executables and shared
716  * libraries.  There is no binary dependent code anywhere else.
717  */
718 
719 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
720 			      struct arch_elf_state *arch,
721 			      bool have_prev_type, u32 *prev_type)
722 {
723 	size_t o, step;
724 	const struct gnu_property *pr;
725 	int ret;
726 
727 	if (*off == datasz)
728 		return -ENOENT;
729 
730 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
731 		return -EIO;
732 	o = *off;
733 	datasz -= *off;
734 
735 	if (datasz < sizeof(*pr))
736 		return -ENOEXEC;
737 	pr = (const struct gnu_property *)(data + o);
738 	o += sizeof(*pr);
739 	datasz -= sizeof(*pr);
740 
741 	if (pr->pr_datasz > datasz)
742 		return -ENOEXEC;
743 
744 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
745 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
746 	if (step > datasz)
747 		return -ENOEXEC;
748 
749 	/* Properties are supposed to be unique and sorted on pr_type: */
750 	if (have_prev_type && pr->pr_type <= *prev_type)
751 		return -ENOEXEC;
752 	*prev_type = pr->pr_type;
753 
754 	ret = arch_parse_elf_property(pr->pr_type, data + o,
755 				      pr->pr_datasz, ELF_COMPAT, arch);
756 	if (ret)
757 		return ret;
758 
759 	*off = o + step;
760 	return 0;
761 }
762 
763 #define NOTE_DATA_SZ SZ_1K
764 #define GNU_PROPERTY_TYPE_0_NAME "GNU"
765 #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
766 
767 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
768 				struct arch_elf_state *arch)
769 {
770 	union {
771 		struct elf_note nhdr;
772 		char data[NOTE_DATA_SZ];
773 	} note;
774 	loff_t pos;
775 	ssize_t n;
776 	size_t off, datasz;
777 	int ret;
778 	bool have_prev_type;
779 	u32 prev_type;
780 
781 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
782 		return 0;
783 
784 	/* load_elf_binary() shouldn't call us unless this is true... */
785 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
786 		return -ENOEXEC;
787 
788 	/* If the properties are crazy large, that's too bad (for now): */
789 	if (phdr->p_filesz > sizeof(note))
790 		return -ENOEXEC;
791 
792 	pos = phdr->p_offset;
793 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
794 
795 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
796 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
797 		return -EIO;
798 
799 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
800 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
801 	    strncmp(note.data + sizeof(note.nhdr),
802 		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
803 		return -ENOEXEC;
804 
805 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
806 		       ELF_GNU_PROPERTY_ALIGN);
807 	if (off > n)
808 		return -ENOEXEC;
809 
810 	if (note.nhdr.n_descsz > n - off)
811 		return -ENOEXEC;
812 	datasz = off + note.nhdr.n_descsz;
813 
814 	have_prev_type = false;
815 	do {
816 		ret = parse_elf_property(note.data, &off, datasz, arch,
817 					 have_prev_type, &prev_type);
818 		have_prev_type = true;
819 	} while (!ret);
820 
821 	return ret == -ENOENT ? 0 : ret;
822 }
823 
824 static int load_elf_binary(struct linux_binprm *bprm)
825 {
826 	struct file *interpreter = NULL; /* to shut gcc up */
827 	unsigned long load_bias = 0, phdr_addr = 0;
828 	int first_pt_load = 1;
829 	unsigned long error;
830 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
831 	struct elf_phdr *elf_property_phdata = NULL;
832 	unsigned long elf_bss, elf_brk;
833 	int bss_prot = 0;
834 	int retval, i;
835 	unsigned long elf_entry;
836 	unsigned long e_entry;
837 	unsigned long interp_load_addr = 0;
838 	unsigned long start_code, end_code, start_data, end_data;
839 	unsigned long reloc_func_desc __maybe_unused = 0;
840 	int executable_stack = EXSTACK_DEFAULT;
841 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
842 	struct elfhdr *interp_elf_ex = NULL;
843 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
844 	struct mm_struct *mm;
845 	struct pt_regs *regs;
846 
847 	retval = -ENOEXEC;
848 	/* First of all, some simple consistency checks */
849 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
850 		goto out;
851 
852 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
853 		goto out;
854 	if (!elf_check_arch(elf_ex))
855 		goto out;
856 	if (elf_check_fdpic(elf_ex))
857 		goto out;
858 	if (!bprm->file->f_op->mmap)
859 		goto out;
860 
861 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
862 	if (!elf_phdata)
863 		goto out;
864 
865 	elf_ppnt = elf_phdata;
866 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
867 		char *elf_interpreter;
868 
869 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
870 			elf_property_phdata = elf_ppnt;
871 			continue;
872 		}
873 
874 		if (elf_ppnt->p_type != PT_INTERP)
875 			continue;
876 
877 		/*
878 		 * This is the program interpreter used for shared libraries -
879 		 * for now assume that this is an a.out format binary.
880 		 */
881 		retval = -ENOEXEC;
882 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
883 			goto out_free_ph;
884 
885 		retval = -ENOMEM;
886 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
887 		if (!elf_interpreter)
888 			goto out_free_ph;
889 
890 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
891 				  elf_ppnt->p_offset);
892 		if (retval < 0)
893 			goto out_free_interp;
894 		/* make sure path is NULL terminated */
895 		retval = -ENOEXEC;
896 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
897 			goto out_free_interp;
898 
899 		interpreter = open_exec(elf_interpreter);
900 		kfree(elf_interpreter);
901 		retval = PTR_ERR(interpreter);
902 		if (IS_ERR(interpreter))
903 			goto out_free_ph;
904 
905 		/*
906 		 * If the binary is not readable then enforce mm->dumpable = 0
907 		 * regardless of the interpreter's permissions.
908 		 */
909 		would_dump(bprm, interpreter);
910 
911 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
912 		if (!interp_elf_ex) {
913 			retval = -ENOMEM;
914 			goto out_free_ph;
915 		}
916 
917 		/* Get the exec headers */
918 		retval = elf_read(interpreter, interp_elf_ex,
919 				  sizeof(*interp_elf_ex), 0);
920 		if (retval < 0)
921 			goto out_free_dentry;
922 
923 		break;
924 
925 out_free_interp:
926 		kfree(elf_interpreter);
927 		goto out_free_ph;
928 	}
929 
930 	elf_ppnt = elf_phdata;
931 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
932 		switch (elf_ppnt->p_type) {
933 		case PT_GNU_STACK:
934 			if (elf_ppnt->p_flags & PF_X)
935 				executable_stack = EXSTACK_ENABLE_X;
936 			else
937 				executable_stack = EXSTACK_DISABLE_X;
938 			break;
939 
940 		case PT_LOPROC ... PT_HIPROC:
941 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
942 						  bprm->file, false,
943 						  &arch_state);
944 			if (retval)
945 				goto out_free_dentry;
946 			break;
947 		}
948 
949 	/* Some simple consistency checks for the interpreter */
950 	if (interpreter) {
951 		retval = -ELIBBAD;
952 		/* Not an ELF interpreter */
953 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
954 			goto out_free_dentry;
955 		/* Verify the interpreter has a valid arch */
956 		if (!elf_check_arch(interp_elf_ex) ||
957 		    elf_check_fdpic(interp_elf_ex))
958 			goto out_free_dentry;
959 
960 		/* Load the interpreter program headers */
961 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
962 						   interpreter);
963 		if (!interp_elf_phdata)
964 			goto out_free_dentry;
965 
966 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
967 		elf_property_phdata = NULL;
968 		elf_ppnt = interp_elf_phdata;
969 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
970 			switch (elf_ppnt->p_type) {
971 			case PT_GNU_PROPERTY:
972 				elf_property_phdata = elf_ppnt;
973 				break;
974 
975 			case PT_LOPROC ... PT_HIPROC:
976 				retval = arch_elf_pt_proc(interp_elf_ex,
977 							  elf_ppnt, interpreter,
978 							  true, &arch_state);
979 				if (retval)
980 					goto out_free_dentry;
981 				break;
982 			}
983 	}
984 
985 	retval = parse_elf_properties(interpreter ?: bprm->file,
986 				      elf_property_phdata, &arch_state);
987 	if (retval)
988 		goto out_free_dentry;
989 
990 	/*
991 	 * Allow arch code to reject the ELF at this point, whilst it's
992 	 * still possible to return an error to the code that invoked
993 	 * the exec syscall.
994 	 */
995 	retval = arch_check_elf(elf_ex,
996 				!!interpreter, interp_elf_ex,
997 				&arch_state);
998 	if (retval)
999 		goto out_free_dentry;
1000 
1001 	/* Flush all traces of the currently running executable */
1002 	retval = begin_new_exec(bprm);
1003 	if (retval)
1004 		goto out_free_dentry;
1005 
1006 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1007 	   may depend on the personality.  */
1008 	SET_PERSONALITY2(*elf_ex, &arch_state);
1009 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1010 		current->personality |= READ_IMPLIES_EXEC;
1011 
1012 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1013 		current->flags |= PF_RANDOMIZE;
1014 
1015 	setup_new_exec(bprm);
1016 
1017 	/* Do this so that we can load the interpreter, if need be.  We will
1018 	   change some of these later */
1019 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1020 				 executable_stack);
1021 	if (retval < 0)
1022 		goto out_free_dentry;
1023 
1024 	elf_bss = 0;
1025 	elf_brk = 0;
1026 
1027 	start_code = ~0UL;
1028 	end_code = 0;
1029 	start_data = 0;
1030 	end_data = 0;
1031 
1032 	/* Now we do a little grungy work by mmapping the ELF image into
1033 	   the correct location in memory. */
1034 	for(i = 0, elf_ppnt = elf_phdata;
1035 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1036 		int elf_prot, elf_flags;
1037 		unsigned long k, vaddr;
1038 		unsigned long total_size = 0;
1039 		unsigned long alignment;
1040 
1041 		if (elf_ppnt->p_type != PT_LOAD)
1042 			continue;
1043 
1044 		if (unlikely (elf_brk > elf_bss)) {
1045 			unsigned long nbyte;
1046 
1047 			/* There was a PT_LOAD segment with p_memsz > p_filesz
1048 			   before this one. Map anonymous pages, if needed,
1049 			   and clear the area.  */
1050 			retval = set_brk(elf_bss + load_bias,
1051 					 elf_brk + load_bias,
1052 					 bss_prot);
1053 			if (retval)
1054 				goto out_free_dentry;
1055 			nbyte = ELF_PAGEOFFSET(elf_bss);
1056 			if (nbyte) {
1057 				nbyte = ELF_MIN_ALIGN - nbyte;
1058 				if (nbyte > elf_brk - elf_bss)
1059 					nbyte = elf_brk - elf_bss;
1060 				if (clear_user((void __user *)elf_bss +
1061 							load_bias, nbyte)) {
1062 					/*
1063 					 * This bss-zeroing can fail if the ELF
1064 					 * file specifies odd protections. So
1065 					 * we don't check the return value
1066 					 */
1067 				}
1068 			}
1069 		}
1070 
1071 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1072 				     !!interpreter, false);
1073 
1074 		elf_flags = MAP_PRIVATE;
1075 
1076 		vaddr = elf_ppnt->p_vaddr;
1077 		/*
1078 		 * The first time through the loop, first_pt_load is true:
1079 		 * layout will be calculated. Once set, use MAP_FIXED since
1080 		 * we know we've already safely mapped the entire region with
1081 		 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1082 		 */
1083 		if (!first_pt_load) {
1084 			elf_flags |= MAP_FIXED;
1085 		} else if (elf_ex->e_type == ET_EXEC) {
1086 			/*
1087 			 * This logic is run once for the first LOAD Program
1088 			 * Header for ET_EXEC binaries. No special handling
1089 			 * is needed.
1090 			 */
1091 			elf_flags |= MAP_FIXED_NOREPLACE;
1092 		} else if (elf_ex->e_type == ET_DYN) {
1093 			/*
1094 			 * This logic is run once for the first LOAD Program
1095 			 * Header for ET_DYN binaries to calculate the
1096 			 * randomization (load_bias) for all the LOAD
1097 			 * Program Headers.
1098 			 *
1099 			 * There are effectively two types of ET_DYN
1100 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1101 			 * and loaders (ET_DYN without INTERP, since they
1102 			 * _are_ the ELF interpreter). The loaders must
1103 			 * be loaded away from programs since the program
1104 			 * may otherwise collide with the loader (especially
1105 			 * for ET_EXEC which does not have a randomized
1106 			 * position). For example to handle invocations of
1107 			 * "./ld.so someprog" to test out a new version of
1108 			 * the loader, the subsequent program that the
1109 			 * loader loads must avoid the loader itself, so
1110 			 * they cannot share the same load range. Sufficient
1111 			 * room for the brk must be allocated with the
1112 			 * loader as well, since brk must be available with
1113 			 * the loader.
1114 			 *
1115 			 * Therefore, programs are loaded offset from
1116 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1117 			 * independently randomized mmap region (0 load_bias
1118 			 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1119 			 */
1120 			if (interpreter) {
1121 				load_bias = ELF_ET_DYN_BASE;
1122 				if (current->flags & PF_RANDOMIZE)
1123 					load_bias += arch_mmap_rnd();
1124 				alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1125 				if (alignment)
1126 					load_bias &= ~(alignment - 1);
1127 				elf_flags |= MAP_FIXED_NOREPLACE;
1128 			} else
1129 				load_bias = 0;
1130 
1131 			/*
1132 			 * Since load_bias is used for all subsequent loading
1133 			 * calculations, we must lower it by the first vaddr
1134 			 * so that the remaining calculations based on the
1135 			 * ELF vaddrs will be correctly offset. The result
1136 			 * is then page aligned.
1137 			 */
1138 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1139 
1140 			/*
1141 			 * Calculate the entire size of the ELF mapping
1142 			 * (total_size), used for the initial mapping,
1143 			 * due to load_addr_set which is set to true later
1144 			 * once the initial mapping is performed.
1145 			 *
1146 			 * Note that this is only sensible when the LOAD
1147 			 * segments are contiguous (or overlapping). If
1148 			 * used for LOADs that are far apart, this would
1149 			 * cause the holes between LOADs to be mapped,
1150 			 * running the risk of having the mapping fail,
1151 			 * as it would be larger than the ELF file itself.
1152 			 *
1153 			 * As a result, only ET_DYN does this, since
1154 			 * some ET_EXEC (e.g. ia64) may have large virtual
1155 			 * memory holes between LOADs.
1156 			 *
1157 			 */
1158 			total_size = total_mapping_size(elf_phdata,
1159 							elf_ex->e_phnum);
1160 			if (!total_size) {
1161 				retval = -EINVAL;
1162 				goto out_free_dentry;
1163 			}
1164 		}
1165 
1166 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1167 				elf_prot, elf_flags, total_size);
1168 		if (BAD_ADDR(error)) {
1169 			retval = IS_ERR((void *)error) ?
1170 				PTR_ERR((void*)error) : -EINVAL;
1171 			goto out_free_dentry;
1172 		}
1173 
1174 		if (first_pt_load) {
1175 			first_pt_load = 0;
1176 			if (elf_ex->e_type == ET_DYN) {
1177 				load_bias += error -
1178 				             ELF_PAGESTART(load_bias + vaddr);
1179 				reloc_func_desc = load_bias;
1180 			}
1181 		}
1182 
1183 		/*
1184 		 * Figure out which segment in the file contains the Program
1185 		 * Header table, and map to the associated memory address.
1186 		 */
1187 		if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1188 		    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1189 			phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1190 				    elf_ppnt->p_vaddr;
1191 		}
1192 
1193 		k = elf_ppnt->p_vaddr;
1194 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1195 			start_code = k;
1196 		if (start_data < k)
1197 			start_data = k;
1198 
1199 		/*
1200 		 * Check to see if the section's size will overflow the
1201 		 * allowed task size. Note that p_filesz must always be
1202 		 * <= p_memsz so it is only necessary to check p_memsz.
1203 		 */
1204 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1205 		    elf_ppnt->p_memsz > TASK_SIZE ||
1206 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1207 			/* set_brk can never work. Avoid overflows. */
1208 			retval = -EINVAL;
1209 			goto out_free_dentry;
1210 		}
1211 
1212 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1213 
1214 		if (k > elf_bss)
1215 			elf_bss = k;
1216 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1217 			end_code = k;
1218 		if (end_data < k)
1219 			end_data = k;
1220 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1221 		if (k > elf_brk) {
1222 			bss_prot = elf_prot;
1223 			elf_brk = k;
1224 		}
1225 	}
1226 
1227 	e_entry = elf_ex->e_entry + load_bias;
1228 	phdr_addr += load_bias;
1229 	elf_bss += load_bias;
1230 	elf_brk += load_bias;
1231 	start_code += load_bias;
1232 	end_code += load_bias;
1233 	start_data += load_bias;
1234 	end_data += load_bias;
1235 
1236 	/* Calling set_brk effectively mmaps the pages that we need
1237 	 * for the bss and break sections.  We must do this before
1238 	 * mapping in the interpreter, to make sure it doesn't wind
1239 	 * up getting placed where the bss needs to go.
1240 	 */
1241 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1242 	if (retval)
1243 		goto out_free_dentry;
1244 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1245 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1246 		goto out_free_dentry;
1247 	}
1248 
1249 	if (interpreter) {
1250 		elf_entry = load_elf_interp(interp_elf_ex,
1251 					    interpreter,
1252 					    load_bias, interp_elf_phdata,
1253 					    &arch_state);
1254 		if (!IS_ERR((void *)elf_entry)) {
1255 			/*
1256 			 * load_elf_interp() returns relocation
1257 			 * adjustment
1258 			 */
1259 			interp_load_addr = elf_entry;
1260 			elf_entry += interp_elf_ex->e_entry;
1261 		}
1262 		if (BAD_ADDR(elf_entry)) {
1263 			retval = IS_ERR((void *)elf_entry) ?
1264 					(int)elf_entry : -EINVAL;
1265 			goto out_free_dentry;
1266 		}
1267 		reloc_func_desc = interp_load_addr;
1268 
1269 		allow_write_access(interpreter);
1270 		fput(interpreter);
1271 
1272 		kfree(interp_elf_ex);
1273 		kfree(interp_elf_phdata);
1274 	} else {
1275 		elf_entry = e_entry;
1276 		if (BAD_ADDR(elf_entry)) {
1277 			retval = -EINVAL;
1278 			goto out_free_dentry;
1279 		}
1280 	}
1281 
1282 	kfree(elf_phdata);
1283 
1284 	set_binfmt(&elf_format);
1285 
1286 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1287 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1288 	if (retval < 0)
1289 		goto out;
1290 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1291 
1292 	retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1293 				   e_entry, phdr_addr);
1294 	if (retval < 0)
1295 		goto out;
1296 
1297 	mm = current->mm;
1298 	mm->end_code = end_code;
1299 	mm->start_code = start_code;
1300 	mm->start_data = start_data;
1301 	mm->end_data = end_data;
1302 	mm->start_stack = bprm->p;
1303 
1304 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1305 		/*
1306 		 * For architectures with ELF randomization, when executing
1307 		 * a loader directly (i.e. no interpreter listed in ELF
1308 		 * headers), move the brk area out of the mmap region
1309 		 * (since it grows up, and may collide early with the stack
1310 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1311 		 */
1312 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1313 		    elf_ex->e_type == ET_DYN && !interpreter) {
1314 			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1315 		}
1316 
1317 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1318 #ifdef compat_brk_randomized
1319 		current->brk_randomized = 1;
1320 #endif
1321 	}
1322 
1323 	if (current->personality & MMAP_PAGE_ZERO) {
1324 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1325 		   and some applications "depend" upon this behavior.
1326 		   Since we do not have the power to recompile these, we
1327 		   emulate the SVr4 behavior. Sigh. */
1328 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1329 				MAP_FIXED | MAP_PRIVATE, 0);
1330 	}
1331 
1332 	regs = current_pt_regs();
1333 #ifdef ELF_PLAT_INIT
1334 	/*
1335 	 * The ABI may specify that certain registers be set up in special
1336 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1337 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1338 	 * that the e_entry field is the address of the function descriptor
1339 	 * for the startup routine, rather than the address of the startup
1340 	 * routine itself.  This macro performs whatever initialization to
1341 	 * the regs structure is required as well as any relocations to the
1342 	 * function descriptor entries when executing dynamically links apps.
1343 	 */
1344 	ELF_PLAT_INIT(regs, reloc_func_desc);
1345 #endif
1346 
1347 	finalize_exec(bprm);
1348 	START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1349 	retval = 0;
1350 out:
1351 	return retval;
1352 
1353 	/* error cleanup */
1354 out_free_dentry:
1355 	kfree(interp_elf_ex);
1356 	kfree(interp_elf_phdata);
1357 	allow_write_access(interpreter);
1358 	if (interpreter)
1359 		fput(interpreter);
1360 out_free_ph:
1361 	kfree(elf_phdata);
1362 	goto out;
1363 }
1364 
1365 #ifdef CONFIG_USELIB
1366 /* This is really simpleminded and specialized - we are loading an
1367    a.out library that is given an ELF header. */
1368 static int load_elf_library(struct file *file)
1369 {
1370 	struct elf_phdr *elf_phdata;
1371 	struct elf_phdr *eppnt;
1372 	unsigned long elf_bss, bss, len;
1373 	int retval, error, i, j;
1374 	struct elfhdr elf_ex;
1375 
1376 	error = -ENOEXEC;
1377 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1378 	if (retval < 0)
1379 		goto out;
1380 
1381 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1382 		goto out;
1383 
1384 	/* First of all, some simple consistency checks */
1385 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1386 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1387 		goto out;
1388 	if (elf_check_fdpic(&elf_ex))
1389 		goto out;
1390 
1391 	/* Now read in all of the header information */
1392 
1393 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1394 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1395 
1396 	error = -ENOMEM;
1397 	elf_phdata = kmalloc(j, GFP_KERNEL);
1398 	if (!elf_phdata)
1399 		goto out;
1400 
1401 	eppnt = elf_phdata;
1402 	error = -ENOEXEC;
1403 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1404 	if (retval < 0)
1405 		goto out_free_ph;
1406 
1407 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1408 		if ((eppnt + i)->p_type == PT_LOAD)
1409 			j++;
1410 	if (j != 1)
1411 		goto out_free_ph;
1412 
1413 	while (eppnt->p_type != PT_LOAD)
1414 		eppnt++;
1415 
1416 	/* Now use mmap to map the library into memory. */
1417 	error = vm_mmap(file,
1418 			ELF_PAGESTART(eppnt->p_vaddr),
1419 			(eppnt->p_filesz +
1420 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1421 			PROT_READ | PROT_WRITE | PROT_EXEC,
1422 			MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1423 			(eppnt->p_offset -
1424 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1425 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1426 		goto out_free_ph;
1427 
1428 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1429 	if (padzero(elf_bss)) {
1430 		error = -EFAULT;
1431 		goto out_free_ph;
1432 	}
1433 
1434 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1435 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1436 	if (bss > len) {
1437 		error = vm_brk(len, bss - len);
1438 		if (error)
1439 			goto out_free_ph;
1440 	}
1441 	error = 0;
1442 
1443 out_free_ph:
1444 	kfree(elf_phdata);
1445 out:
1446 	return error;
1447 }
1448 #endif /* #ifdef CONFIG_USELIB */
1449 
1450 #ifdef CONFIG_ELF_CORE
1451 /*
1452  * ELF core dumper
1453  *
1454  * Modelled on fs/exec.c:aout_core_dump()
1455  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1456  */
1457 
1458 /* An ELF note in memory */
1459 struct memelfnote
1460 {
1461 	const char *name;
1462 	int type;
1463 	unsigned int datasz;
1464 	void *data;
1465 };
1466 
1467 static int notesize(struct memelfnote *en)
1468 {
1469 	int sz;
1470 
1471 	sz = sizeof(struct elf_note);
1472 	sz += roundup(strlen(en->name) + 1, 4);
1473 	sz += roundup(en->datasz, 4);
1474 
1475 	return sz;
1476 }
1477 
1478 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1479 {
1480 	struct elf_note en;
1481 	en.n_namesz = strlen(men->name) + 1;
1482 	en.n_descsz = men->datasz;
1483 	en.n_type = men->type;
1484 
1485 	return dump_emit(cprm, &en, sizeof(en)) &&
1486 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1487 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1488 }
1489 
1490 static void fill_elf_header(struct elfhdr *elf, int segs,
1491 			    u16 machine, u32 flags)
1492 {
1493 	memset(elf, 0, sizeof(*elf));
1494 
1495 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1496 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1497 	elf->e_ident[EI_DATA] = ELF_DATA;
1498 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1499 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1500 
1501 	elf->e_type = ET_CORE;
1502 	elf->e_machine = machine;
1503 	elf->e_version = EV_CURRENT;
1504 	elf->e_phoff = sizeof(struct elfhdr);
1505 	elf->e_flags = flags;
1506 	elf->e_ehsize = sizeof(struct elfhdr);
1507 	elf->e_phentsize = sizeof(struct elf_phdr);
1508 	elf->e_phnum = segs;
1509 }
1510 
1511 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1512 {
1513 	phdr->p_type = PT_NOTE;
1514 	phdr->p_offset = offset;
1515 	phdr->p_vaddr = 0;
1516 	phdr->p_paddr = 0;
1517 	phdr->p_filesz = sz;
1518 	phdr->p_memsz = 0;
1519 	phdr->p_flags = 0;
1520 	phdr->p_align = 0;
1521 }
1522 
1523 static void fill_note(struct memelfnote *note, const char *name, int type,
1524 		unsigned int sz, void *data)
1525 {
1526 	note->name = name;
1527 	note->type = type;
1528 	note->datasz = sz;
1529 	note->data = data;
1530 }
1531 
1532 /*
1533  * fill up all the fields in prstatus from the given task struct, except
1534  * registers which need to be filled up separately.
1535  */
1536 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1537 		struct task_struct *p, long signr)
1538 {
1539 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1540 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1541 	prstatus->pr_sighold = p->blocked.sig[0];
1542 	rcu_read_lock();
1543 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1544 	rcu_read_unlock();
1545 	prstatus->pr_pid = task_pid_vnr(p);
1546 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1547 	prstatus->pr_sid = task_session_vnr(p);
1548 	if (thread_group_leader(p)) {
1549 		struct task_cputime cputime;
1550 
1551 		/*
1552 		 * This is the record for the group leader.  It shows the
1553 		 * group-wide total, not its individual thread total.
1554 		 */
1555 		thread_group_cputime(p, &cputime);
1556 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1557 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1558 	} else {
1559 		u64 utime, stime;
1560 
1561 		task_cputime(p, &utime, &stime);
1562 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1563 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1564 	}
1565 
1566 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1567 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1568 }
1569 
1570 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1571 		       struct mm_struct *mm)
1572 {
1573 	const struct cred *cred;
1574 	unsigned int i, len;
1575 	unsigned int state;
1576 
1577 	/* first copy the parameters from user space */
1578 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1579 
1580 	len = mm->arg_end - mm->arg_start;
1581 	if (len >= ELF_PRARGSZ)
1582 		len = ELF_PRARGSZ-1;
1583 	if (copy_from_user(&psinfo->pr_psargs,
1584 		           (const char __user *)mm->arg_start, len))
1585 		return -EFAULT;
1586 	for(i = 0; i < len; i++)
1587 		if (psinfo->pr_psargs[i] == 0)
1588 			psinfo->pr_psargs[i] = ' ';
1589 	psinfo->pr_psargs[len] = 0;
1590 
1591 	rcu_read_lock();
1592 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1593 	rcu_read_unlock();
1594 	psinfo->pr_pid = task_pid_vnr(p);
1595 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1596 	psinfo->pr_sid = task_session_vnr(p);
1597 
1598 	state = READ_ONCE(p->__state);
1599 	i = state ? ffz(~state) + 1 : 0;
1600 	psinfo->pr_state = i;
1601 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1602 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1603 	psinfo->pr_nice = task_nice(p);
1604 	psinfo->pr_flag = p->flags;
1605 	rcu_read_lock();
1606 	cred = __task_cred(p);
1607 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1608 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1609 	rcu_read_unlock();
1610 	get_task_comm(psinfo->pr_fname, p);
1611 
1612 	return 0;
1613 }
1614 
1615 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1616 {
1617 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1618 	int i = 0;
1619 	do
1620 		i += 2;
1621 	while (auxv[i - 2] != AT_NULL);
1622 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1623 }
1624 
1625 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1626 		const kernel_siginfo_t *siginfo)
1627 {
1628 	copy_siginfo_to_external(csigdata, siginfo);
1629 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1630 }
1631 
1632 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1633 /*
1634  * Format of NT_FILE note:
1635  *
1636  * long count     -- how many files are mapped
1637  * long page_size -- units for file_ofs
1638  * array of [COUNT] elements of
1639  *   long start
1640  *   long end
1641  *   long file_ofs
1642  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1643  */
1644 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1645 {
1646 	unsigned count, size, names_ofs, remaining, n;
1647 	user_long_t *data;
1648 	user_long_t *start_end_ofs;
1649 	char *name_base, *name_curpos;
1650 	int i;
1651 
1652 	/* *Estimated* file count and total data size needed */
1653 	count = cprm->vma_count;
1654 	if (count > UINT_MAX / 64)
1655 		return -EINVAL;
1656 	size = count * 64;
1657 
1658 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1659  alloc:
1660 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1661 		return -EINVAL;
1662 	size = round_up(size, PAGE_SIZE);
1663 	/*
1664 	 * "size" can be 0 here legitimately.
1665 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1666 	 */
1667 	data = kvmalloc(size, GFP_KERNEL);
1668 	if (ZERO_OR_NULL_PTR(data))
1669 		return -ENOMEM;
1670 
1671 	start_end_ofs = data + 2;
1672 	name_base = name_curpos = ((char *)data) + names_ofs;
1673 	remaining = size - names_ofs;
1674 	count = 0;
1675 	for (i = 0; i < cprm->vma_count; i++) {
1676 		struct core_vma_metadata *m = &cprm->vma_meta[i];
1677 		struct file *file;
1678 		const char *filename;
1679 
1680 		file = m->file;
1681 		if (!file)
1682 			continue;
1683 		filename = file_path(file, name_curpos, remaining);
1684 		if (IS_ERR(filename)) {
1685 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1686 				kvfree(data);
1687 				size = size * 5 / 4;
1688 				goto alloc;
1689 			}
1690 			continue;
1691 		}
1692 
1693 		/* file_path() fills at the end, move name down */
1694 		/* n = strlen(filename) + 1: */
1695 		n = (name_curpos + remaining) - filename;
1696 		remaining = filename - name_curpos;
1697 		memmove(name_curpos, filename, n);
1698 		name_curpos += n;
1699 
1700 		*start_end_ofs++ = m->start;
1701 		*start_end_ofs++ = m->end;
1702 		*start_end_ofs++ = m->pgoff;
1703 		count++;
1704 	}
1705 
1706 	/* Now we know exact count of files, can store it */
1707 	data[0] = count;
1708 	data[1] = PAGE_SIZE;
1709 	/*
1710 	 * Count usually is less than mm->map_count,
1711 	 * we need to move filenames down.
1712 	 */
1713 	n = cprm->vma_count - count;
1714 	if (n != 0) {
1715 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1716 		memmove(name_base - shift_bytes, name_base,
1717 			name_curpos - name_base);
1718 		name_curpos -= shift_bytes;
1719 	}
1720 
1721 	size = name_curpos - (char *)data;
1722 	fill_note(note, "CORE", NT_FILE, size, data);
1723 	return 0;
1724 }
1725 
1726 #ifdef CORE_DUMP_USE_REGSET
1727 #include <linux/regset.h>
1728 
1729 struct elf_thread_core_info {
1730 	struct elf_thread_core_info *next;
1731 	struct task_struct *task;
1732 	struct elf_prstatus prstatus;
1733 	struct memelfnote notes[];
1734 };
1735 
1736 struct elf_note_info {
1737 	struct elf_thread_core_info *thread;
1738 	struct memelfnote psinfo;
1739 	struct memelfnote signote;
1740 	struct memelfnote auxv;
1741 	struct memelfnote files;
1742 	user_siginfo_t csigdata;
1743 	size_t size;
1744 	int thread_notes;
1745 };
1746 
1747 /*
1748  * When a regset has a writeback hook, we call it on each thread before
1749  * dumping user memory.  On register window machines, this makes sure the
1750  * user memory backing the register data is up to date before we read it.
1751  */
1752 static void do_thread_regset_writeback(struct task_struct *task,
1753 				       const struct user_regset *regset)
1754 {
1755 	if (regset->writeback)
1756 		regset->writeback(task, regset, 1);
1757 }
1758 
1759 #ifndef PRSTATUS_SIZE
1760 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1761 #endif
1762 
1763 #ifndef SET_PR_FPVALID
1764 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1765 #endif
1766 
1767 static int fill_thread_core_info(struct elf_thread_core_info *t,
1768 				 const struct user_regset_view *view,
1769 				 long signr, struct elf_note_info *info)
1770 {
1771 	unsigned int note_iter, view_iter;
1772 
1773 	/*
1774 	 * NT_PRSTATUS is the one special case, because the regset data
1775 	 * goes into the pr_reg field inside the note contents, rather
1776 	 * than being the whole note contents.  We fill the reset in here.
1777 	 * We assume that regset 0 is NT_PRSTATUS.
1778 	 */
1779 	fill_prstatus(&t->prstatus.common, t->task, signr);
1780 	regset_get(t->task, &view->regsets[0],
1781 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1782 
1783 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1784 		  PRSTATUS_SIZE, &t->prstatus);
1785 	info->size += notesize(&t->notes[0]);
1786 
1787 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1788 
1789 	/*
1790 	 * Each other regset might generate a note too.  For each regset
1791 	 * that has no core_note_type or is inactive, skip it.
1792 	 */
1793 	note_iter = 1;
1794 	for (view_iter = 1; view_iter < view->n; ++view_iter) {
1795 		const struct user_regset *regset = &view->regsets[view_iter];
1796 		int note_type = regset->core_note_type;
1797 		bool is_fpreg = note_type == NT_PRFPREG;
1798 		void *data;
1799 		int ret;
1800 
1801 		do_thread_regset_writeback(t->task, regset);
1802 		if (!note_type) // not for coredumps
1803 			continue;
1804 		if (regset->active && regset->active(t->task, regset) <= 0)
1805 			continue;
1806 
1807 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1808 		if (ret < 0)
1809 			continue;
1810 
1811 		if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1812 			break;
1813 
1814 		if (is_fpreg)
1815 			SET_PR_FPVALID(&t->prstatus);
1816 
1817 		fill_note(&t->notes[note_iter], is_fpreg ? "CORE" : "LINUX",
1818 			  note_type, ret, data);
1819 
1820 		info->size += notesize(&t->notes[note_iter]);
1821 		note_iter++;
1822 	}
1823 
1824 	return 1;
1825 }
1826 
1827 static int fill_note_info(struct elfhdr *elf, int phdrs,
1828 			  struct elf_note_info *info,
1829 			  struct coredump_params *cprm)
1830 {
1831 	struct task_struct *dump_task = current;
1832 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1833 	struct elf_thread_core_info *t;
1834 	struct elf_prpsinfo *psinfo;
1835 	struct core_thread *ct;
1836 	unsigned int i;
1837 
1838 	info->size = 0;
1839 	info->thread = NULL;
1840 
1841 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1842 	if (psinfo == NULL) {
1843 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1844 		return 0;
1845 	}
1846 
1847 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1848 
1849 	/*
1850 	 * Figure out how many notes we're going to need for each thread.
1851 	 */
1852 	info->thread_notes = 0;
1853 	for (i = 0; i < view->n; ++i)
1854 		if (view->regsets[i].core_note_type != 0)
1855 			++info->thread_notes;
1856 
1857 	/*
1858 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1859 	 * since it is our one special case.
1860 	 */
1861 	if (unlikely(info->thread_notes == 0) ||
1862 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1863 		WARN_ON(1);
1864 		return 0;
1865 	}
1866 
1867 	/*
1868 	 * Initialize the ELF file header.
1869 	 */
1870 	fill_elf_header(elf, phdrs,
1871 			view->e_machine, view->e_flags);
1872 
1873 	/*
1874 	 * Allocate a structure for each thread.
1875 	 */
1876 	for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
1877 		t = kzalloc(offsetof(struct elf_thread_core_info,
1878 				     notes[info->thread_notes]),
1879 			    GFP_KERNEL);
1880 		if (unlikely(!t))
1881 			return 0;
1882 
1883 		t->task = ct->task;
1884 		if (ct->task == dump_task || !info->thread) {
1885 			t->next = info->thread;
1886 			info->thread = t;
1887 		} else {
1888 			/*
1889 			 * Make sure to keep the original task at
1890 			 * the head of the list.
1891 			 */
1892 			t->next = info->thread->next;
1893 			info->thread->next = t;
1894 		}
1895 	}
1896 
1897 	/*
1898 	 * Now fill in each thread's information.
1899 	 */
1900 	for (t = info->thread; t != NULL; t = t->next)
1901 		if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1902 			return 0;
1903 
1904 	/*
1905 	 * Fill in the two process-wide notes.
1906 	 */
1907 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1908 	info->size += notesize(&info->psinfo);
1909 
1910 	fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1911 	info->size += notesize(&info->signote);
1912 
1913 	fill_auxv_note(&info->auxv, current->mm);
1914 	info->size += notesize(&info->auxv);
1915 
1916 	if (fill_files_note(&info->files, cprm) == 0)
1917 		info->size += notesize(&info->files);
1918 
1919 	return 1;
1920 }
1921 
1922 static size_t get_note_info_size(struct elf_note_info *info)
1923 {
1924 	return info->size;
1925 }
1926 
1927 /*
1928  * Write all the notes for each thread.  When writing the first thread, the
1929  * process-wide notes are interleaved after the first thread-specific note.
1930  */
1931 static int write_note_info(struct elf_note_info *info,
1932 			   struct coredump_params *cprm)
1933 {
1934 	bool first = true;
1935 	struct elf_thread_core_info *t = info->thread;
1936 
1937 	do {
1938 		int i;
1939 
1940 		if (!writenote(&t->notes[0], cprm))
1941 			return 0;
1942 
1943 		if (first && !writenote(&info->psinfo, cprm))
1944 			return 0;
1945 		if (first && !writenote(&info->signote, cprm))
1946 			return 0;
1947 		if (first && !writenote(&info->auxv, cprm))
1948 			return 0;
1949 		if (first && info->files.data &&
1950 				!writenote(&info->files, cprm))
1951 			return 0;
1952 
1953 		for (i = 1; i < info->thread_notes; ++i)
1954 			if (t->notes[i].data &&
1955 			    !writenote(&t->notes[i], cprm))
1956 				return 0;
1957 
1958 		first = false;
1959 		t = t->next;
1960 	} while (t);
1961 
1962 	return 1;
1963 }
1964 
1965 static void free_note_info(struct elf_note_info *info)
1966 {
1967 	struct elf_thread_core_info *threads = info->thread;
1968 	while (threads) {
1969 		unsigned int i;
1970 		struct elf_thread_core_info *t = threads;
1971 		threads = t->next;
1972 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1973 		for (i = 1; i < info->thread_notes; ++i)
1974 			kfree(t->notes[i].data);
1975 		kfree(t);
1976 	}
1977 	kfree(info->psinfo.data);
1978 	kvfree(info->files.data);
1979 }
1980 
1981 #else
1982 
1983 /* Here is the structure in which status of each thread is captured. */
1984 struct elf_thread_status
1985 {
1986 	struct list_head list;
1987 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1988 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1989 	struct task_struct *thread;
1990 	struct memelfnote notes[3];
1991 	int num_notes;
1992 };
1993 
1994 /*
1995  * In order to add the specific thread information for the elf file format,
1996  * we need to keep a linked list of every threads pr_status and then create
1997  * a single section for them in the final core file.
1998  */
1999 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
2000 {
2001 	int sz = 0;
2002 	struct task_struct *p = t->thread;
2003 	t->num_notes = 0;
2004 
2005 	fill_prstatus(&t->prstatus.common, p, signr);
2006 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
2007 
2008 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
2009 		  &(t->prstatus));
2010 	t->num_notes++;
2011 	sz += notesize(&t->notes[0]);
2012 
2013 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
2014 								&t->fpu))) {
2015 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
2016 			  &(t->fpu));
2017 		t->num_notes++;
2018 		sz += notesize(&t->notes[1]);
2019 	}
2020 	return sz;
2021 }
2022 
2023 struct elf_note_info {
2024 	struct memelfnote *notes;
2025 	struct memelfnote *notes_files;
2026 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
2027 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
2028 	struct list_head thread_list;
2029 	elf_fpregset_t *fpu;
2030 	user_siginfo_t csigdata;
2031 	int thread_status_size;
2032 	int numnote;
2033 };
2034 
2035 static int elf_note_info_init(struct elf_note_info *info)
2036 {
2037 	memset(info, 0, sizeof(*info));
2038 	INIT_LIST_HEAD(&info->thread_list);
2039 
2040 	/* Allocate space for ELF notes */
2041 	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2042 	if (!info->notes)
2043 		return 0;
2044 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2045 	if (!info->psinfo)
2046 		return 0;
2047 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2048 	if (!info->prstatus)
2049 		return 0;
2050 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2051 	if (!info->fpu)
2052 		return 0;
2053 	return 1;
2054 }
2055 
2056 static int fill_note_info(struct elfhdr *elf, int phdrs,
2057 			  struct elf_note_info *info,
2058 			  struct coredump_params *cprm)
2059 {
2060 	struct core_thread *ct;
2061 	struct elf_thread_status *ets;
2062 
2063 	if (!elf_note_info_init(info))
2064 		return 0;
2065 
2066 	for (ct = current->signal->core_state->dumper.next;
2067 					ct; ct = ct->next) {
2068 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2069 		if (!ets)
2070 			return 0;
2071 
2072 		ets->thread = ct->task;
2073 		list_add(&ets->list, &info->thread_list);
2074 	}
2075 
2076 	list_for_each_entry(ets, &info->thread_list, list) {
2077 		int sz;
2078 
2079 		sz = elf_dump_thread_status(cprm->siginfo->si_signo, ets);
2080 		info->thread_status_size += sz;
2081 	}
2082 	/* now collect the dump for the current */
2083 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2084 	fill_prstatus(&info->prstatus->common, current, cprm->siginfo->si_signo);
2085 	elf_core_copy_regs(&info->prstatus->pr_reg, cprm->regs);
2086 
2087 	/* Set up header */
2088 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2089 
2090 	/*
2091 	 * Set up the notes in similar form to SVR4 core dumps made
2092 	 * with info from their /proc.
2093 	 */
2094 
2095 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2096 		  sizeof(*info->prstatus), info->prstatus);
2097 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2098 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2099 		  sizeof(*info->psinfo), info->psinfo);
2100 
2101 	fill_siginfo_note(info->notes + 2, &info->csigdata, cprm->siginfo);
2102 	fill_auxv_note(info->notes + 3, current->mm);
2103 	info->numnote = 4;
2104 
2105 	if (fill_files_note(info->notes + info->numnote, cprm) == 0) {
2106 		info->notes_files = info->notes + info->numnote;
2107 		info->numnote++;
2108 	}
2109 
2110 	/* Try to dump the FPU. */
2111 	info->prstatus->pr_fpvalid =
2112 		elf_core_copy_task_fpregs(current, cprm->regs, info->fpu);
2113 	if (info->prstatus->pr_fpvalid)
2114 		fill_note(info->notes + info->numnote++,
2115 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2116 	return 1;
2117 }
2118 
2119 static size_t get_note_info_size(struct elf_note_info *info)
2120 {
2121 	int sz = 0;
2122 	int i;
2123 
2124 	for (i = 0; i < info->numnote; i++)
2125 		sz += notesize(info->notes + i);
2126 
2127 	sz += info->thread_status_size;
2128 
2129 	return sz;
2130 }
2131 
2132 static int write_note_info(struct elf_note_info *info,
2133 			   struct coredump_params *cprm)
2134 {
2135 	struct elf_thread_status *ets;
2136 	int i;
2137 
2138 	for (i = 0; i < info->numnote; i++)
2139 		if (!writenote(info->notes + i, cprm))
2140 			return 0;
2141 
2142 	/* write out the thread status notes section */
2143 	list_for_each_entry(ets, &info->thread_list, list) {
2144 		for (i = 0; i < ets->num_notes; i++)
2145 			if (!writenote(&ets->notes[i], cprm))
2146 				return 0;
2147 	}
2148 
2149 	return 1;
2150 }
2151 
2152 static void free_note_info(struct elf_note_info *info)
2153 {
2154 	while (!list_empty(&info->thread_list)) {
2155 		struct list_head *tmp = info->thread_list.next;
2156 		list_del(tmp);
2157 		kfree(list_entry(tmp, struct elf_thread_status, list));
2158 	}
2159 
2160 	/* Free data possibly allocated by fill_files_note(): */
2161 	if (info->notes_files)
2162 		kvfree(info->notes_files->data);
2163 
2164 	kfree(info->prstatus);
2165 	kfree(info->psinfo);
2166 	kfree(info->notes);
2167 	kfree(info->fpu);
2168 }
2169 
2170 #endif
2171 
2172 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2173 			     elf_addr_t e_shoff, int segs)
2174 {
2175 	elf->e_shoff = e_shoff;
2176 	elf->e_shentsize = sizeof(*shdr4extnum);
2177 	elf->e_shnum = 1;
2178 	elf->e_shstrndx = SHN_UNDEF;
2179 
2180 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2181 
2182 	shdr4extnum->sh_type = SHT_NULL;
2183 	shdr4extnum->sh_size = elf->e_shnum;
2184 	shdr4extnum->sh_link = elf->e_shstrndx;
2185 	shdr4extnum->sh_info = segs;
2186 }
2187 
2188 /*
2189  * Actual dumper
2190  *
2191  * This is a two-pass process; first we find the offsets of the bits,
2192  * and then they are actually written out.  If we run out of core limit
2193  * we just truncate.
2194  */
2195 static int elf_core_dump(struct coredump_params *cprm)
2196 {
2197 	int has_dumped = 0;
2198 	int segs, i;
2199 	struct elfhdr elf;
2200 	loff_t offset = 0, dataoff;
2201 	struct elf_note_info info = { };
2202 	struct elf_phdr *phdr4note = NULL;
2203 	struct elf_shdr *shdr4extnum = NULL;
2204 	Elf_Half e_phnum;
2205 	elf_addr_t e_shoff;
2206 
2207 	/*
2208 	 * The number of segs are recored into ELF header as 16bit value.
2209 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2210 	 */
2211 	segs = cprm->vma_count + elf_core_extra_phdrs();
2212 
2213 	/* for notes section */
2214 	segs++;
2215 
2216 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2217 	 * this, kernel supports extended numbering. Have a look at
2218 	 * include/linux/elf.h for further information. */
2219 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2220 
2221 	/*
2222 	 * Collect all the non-memory information about the process for the
2223 	 * notes.  This also sets up the file header.
2224 	 */
2225 	if (!fill_note_info(&elf, e_phnum, &info, cprm))
2226 		goto end_coredump;
2227 
2228 	has_dumped = 1;
2229 
2230 	offset += sizeof(elf);				/* Elf header */
2231 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2232 
2233 	/* Write notes phdr entry */
2234 	{
2235 		size_t sz = get_note_info_size(&info);
2236 
2237 		/* For cell spufs */
2238 		sz += elf_coredump_extra_notes_size();
2239 
2240 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2241 		if (!phdr4note)
2242 			goto end_coredump;
2243 
2244 		fill_elf_note_phdr(phdr4note, sz, offset);
2245 		offset += sz;
2246 	}
2247 
2248 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2249 
2250 	offset += cprm->vma_data_size;
2251 	offset += elf_core_extra_data_size();
2252 	e_shoff = offset;
2253 
2254 	if (e_phnum == PN_XNUM) {
2255 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2256 		if (!shdr4extnum)
2257 			goto end_coredump;
2258 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2259 	}
2260 
2261 	offset = dataoff;
2262 
2263 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2264 		goto end_coredump;
2265 
2266 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2267 		goto end_coredump;
2268 
2269 	/* Write program headers for segments dump */
2270 	for (i = 0; i < cprm->vma_count; i++) {
2271 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2272 		struct elf_phdr phdr;
2273 
2274 		phdr.p_type = PT_LOAD;
2275 		phdr.p_offset = offset;
2276 		phdr.p_vaddr = meta->start;
2277 		phdr.p_paddr = 0;
2278 		phdr.p_filesz = meta->dump_size;
2279 		phdr.p_memsz = meta->end - meta->start;
2280 		offset += phdr.p_filesz;
2281 		phdr.p_flags = 0;
2282 		if (meta->flags & VM_READ)
2283 			phdr.p_flags |= PF_R;
2284 		if (meta->flags & VM_WRITE)
2285 			phdr.p_flags |= PF_W;
2286 		if (meta->flags & VM_EXEC)
2287 			phdr.p_flags |= PF_X;
2288 		phdr.p_align = ELF_EXEC_PAGESIZE;
2289 
2290 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2291 			goto end_coredump;
2292 	}
2293 
2294 	if (!elf_core_write_extra_phdrs(cprm, offset))
2295 		goto end_coredump;
2296 
2297  	/* write out the notes section */
2298 	if (!write_note_info(&info, cprm))
2299 		goto end_coredump;
2300 
2301 	/* For cell spufs */
2302 	if (elf_coredump_extra_notes_write(cprm))
2303 		goto end_coredump;
2304 
2305 	/* Align to page */
2306 	dump_skip_to(cprm, dataoff);
2307 
2308 	for (i = 0; i < cprm->vma_count; i++) {
2309 		struct core_vma_metadata *meta = cprm->vma_meta + i;
2310 
2311 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2312 			goto end_coredump;
2313 	}
2314 
2315 	if (!elf_core_write_extra_data(cprm))
2316 		goto end_coredump;
2317 
2318 	if (e_phnum == PN_XNUM) {
2319 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2320 			goto end_coredump;
2321 	}
2322 
2323 end_coredump:
2324 	free_note_info(&info);
2325 	kfree(shdr4extnum);
2326 	kfree(phdr4note);
2327 	return has_dumped;
2328 }
2329 
2330 #endif		/* CONFIG_ELF_CORE */
2331 
2332 static int __init init_elf_binfmt(void)
2333 {
2334 	register_binfmt(&elf_format);
2335 	return 0;
2336 }
2337 
2338 static void __exit exit_elf_binfmt(void)
2339 {
2340 	/* Remove the COFF and ELF loaders. */
2341 	unregister_binfmt(&elf_format);
2342 }
2343 
2344 core_initcall(init_elf_binfmt);
2345 module_exit(exit_elf_binfmt);
2346 MODULE_LICENSE("GPL");
2347 
2348 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2349 #include "binfmt_elf_test.c"
2350 #endif
2351