xref: /linux/fs/binfmt_elf.c (revision 2c739ced5886cd8c8361faa79a9522ec05174ed0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/fs/binfmt_elf.c
4  *
5  * These are the functions used to load ELF format executables as used
6  * on SVr4 machines.  Information on the format may be found in the book
7  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8  * Tools".
9  *
10  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <asm/param.h>
50 #include <asm/page.h>
51 
52 #ifndef ELF_COMPAT
53 #define ELF_COMPAT 0
54 #endif
55 
56 #ifndef user_long_t
57 #define user_long_t long
58 #endif
59 #ifndef user_siginfo_t
60 #define user_siginfo_t siginfo_t
61 #endif
62 
63 /* That's for binfmt_elf_fdpic to deal with */
64 #ifndef elf_check_fdpic
65 #define elf_check_fdpic(ex) false
66 #endif
67 
68 static int load_elf_binary(struct linux_binprm *bprm);
69 
70 #ifdef CONFIG_USELIB
71 static int load_elf_library(struct file *);
72 #else
73 #define load_elf_library NULL
74 #endif
75 
76 /*
77  * If we don't support core dumping, then supply a NULL so we
78  * don't even try.
79  */
80 #ifdef CONFIG_ELF_CORE
81 static int elf_core_dump(struct coredump_params *cprm);
82 #else
83 #define elf_core_dump	NULL
84 #endif
85 
86 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
87 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
88 #else
89 #define ELF_MIN_ALIGN	PAGE_SIZE
90 #endif
91 
92 #ifndef ELF_CORE_EFLAGS
93 #define ELF_CORE_EFLAGS	0
94 #endif
95 
96 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
97 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
98 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
99 
100 static struct linux_binfmt elf_format = {
101 	.module		= THIS_MODULE,
102 	.load_binary	= load_elf_binary,
103 	.load_shlib	= load_elf_library,
104 	.core_dump	= elf_core_dump,
105 	.min_coredump	= ELF_EXEC_PAGESIZE,
106 };
107 
108 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
109 
110 static int set_brk(unsigned long start, unsigned long end, int prot)
111 {
112 	start = ELF_PAGEALIGN(start);
113 	end = ELF_PAGEALIGN(end);
114 	if (end > start) {
115 		/*
116 		 * Map the last of the bss segment.
117 		 * If the header is requesting these pages to be
118 		 * executable, honour that (ppc32 needs this).
119 		 */
120 		int error = vm_brk_flags(start, end - start,
121 				prot & PROT_EXEC ? VM_EXEC : 0);
122 		if (error)
123 			return error;
124 	}
125 	current->mm->start_brk = current->mm->brk = end;
126 	return 0;
127 }
128 
129 /* We need to explicitly zero any fractional pages
130    after the data section (i.e. bss).  This would
131    contain the junk from the file that should not
132    be in memory
133  */
134 static int padzero(unsigned long elf_bss)
135 {
136 	unsigned long nbyte;
137 
138 	nbyte = ELF_PAGEOFFSET(elf_bss);
139 	if (nbyte) {
140 		nbyte = ELF_MIN_ALIGN - nbyte;
141 		if (clear_user((void __user *) elf_bss, nbyte))
142 			return -EFAULT;
143 	}
144 	return 0;
145 }
146 
147 /* Let's use some macros to make this stack manipulation a little clearer */
148 #ifdef CONFIG_STACK_GROWSUP
149 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
150 #define STACK_ROUND(sp, items) \
151 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
152 #define STACK_ALLOC(sp, len) ({ \
153 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
154 	old_sp; })
155 #else
156 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
157 #define STACK_ROUND(sp, items) \
158 	(((unsigned long) (sp - items)) &~ 15UL)
159 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
160 #endif
161 
162 #ifndef ELF_BASE_PLATFORM
163 /*
164  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
165  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
166  * will be copied to the user stack in the same manner as AT_PLATFORM.
167  */
168 #define ELF_BASE_PLATFORM NULL
169 #endif
170 
171 static int
172 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
173 		unsigned long load_addr, unsigned long interp_load_addr,
174 		unsigned long e_entry)
175 {
176 	struct mm_struct *mm = current->mm;
177 	unsigned long p = bprm->p;
178 	int argc = bprm->argc;
179 	int envc = bprm->envc;
180 	elf_addr_t __user *sp;
181 	elf_addr_t __user *u_platform;
182 	elf_addr_t __user *u_base_platform;
183 	elf_addr_t __user *u_rand_bytes;
184 	const char *k_platform = ELF_PLATFORM;
185 	const char *k_base_platform = ELF_BASE_PLATFORM;
186 	unsigned char k_rand_bytes[16];
187 	int items;
188 	elf_addr_t *elf_info;
189 	int ei_index;
190 	const struct cred *cred = current_cred();
191 	struct vm_area_struct *vma;
192 
193 	/*
194 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
195 	 * evictions by the processes running on the same package. One
196 	 * thing we can do is to shuffle the initial stack for them.
197 	 */
198 
199 	p = arch_align_stack(p);
200 
201 	/*
202 	 * If this architecture has a platform capability string, copy it
203 	 * to userspace.  In some cases (Sparc), this info is impossible
204 	 * for userspace to get any other way, in others (i386) it is
205 	 * merely difficult.
206 	 */
207 	u_platform = NULL;
208 	if (k_platform) {
209 		size_t len = strlen(k_platform) + 1;
210 
211 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
212 		if (copy_to_user(u_platform, k_platform, len))
213 			return -EFAULT;
214 	}
215 
216 	/*
217 	 * If this architecture has a "base" platform capability
218 	 * string, copy it to userspace.
219 	 */
220 	u_base_platform = NULL;
221 	if (k_base_platform) {
222 		size_t len = strlen(k_base_platform) + 1;
223 
224 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
225 		if (copy_to_user(u_base_platform, k_base_platform, len))
226 			return -EFAULT;
227 	}
228 
229 	/*
230 	 * Generate 16 random bytes for userspace PRNG seeding.
231 	 */
232 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
233 	u_rand_bytes = (elf_addr_t __user *)
234 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
235 	if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
236 		return -EFAULT;
237 
238 	/* Create the ELF interpreter info */
239 	elf_info = (elf_addr_t *)mm->saved_auxv;
240 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
241 #define NEW_AUX_ENT(id, val) \
242 	do { \
243 		*elf_info++ = id; \
244 		*elf_info++ = val; \
245 	} while (0)
246 
247 #ifdef ARCH_DLINFO
248 	/*
249 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
250 	 * AUXV.
251 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
252 	 * ARCH_DLINFO changes
253 	 */
254 	ARCH_DLINFO;
255 #endif
256 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
257 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
258 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
259 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
260 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
261 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
262 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
263 	NEW_AUX_ENT(AT_FLAGS, 0);
264 	NEW_AUX_ENT(AT_ENTRY, e_entry);
265 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
266 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
267 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
268 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
269 	NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
270 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
271 #ifdef ELF_HWCAP2
272 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
273 #endif
274 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
275 	if (k_platform) {
276 		NEW_AUX_ENT(AT_PLATFORM,
277 			    (elf_addr_t)(unsigned long)u_platform);
278 	}
279 	if (k_base_platform) {
280 		NEW_AUX_ENT(AT_BASE_PLATFORM,
281 			    (elf_addr_t)(unsigned long)u_base_platform);
282 	}
283 	if (bprm->have_execfd) {
284 		NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
285 	}
286 #undef NEW_AUX_ENT
287 	/* AT_NULL is zero; clear the rest too */
288 	memset(elf_info, 0, (char *)mm->saved_auxv +
289 			sizeof(mm->saved_auxv) - (char *)elf_info);
290 
291 	/* And advance past the AT_NULL entry.  */
292 	elf_info += 2;
293 
294 	ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
295 	sp = STACK_ADD(p, ei_index);
296 
297 	items = (argc + 1) + (envc + 1) + 1;
298 	bprm->p = STACK_ROUND(sp, items);
299 
300 	/* Point sp at the lowest address on the stack */
301 #ifdef CONFIG_STACK_GROWSUP
302 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
303 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
304 #else
305 	sp = (elf_addr_t __user *)bprm->p;
306 #endif
307 
308 
309 	/*
310 	 * Grow the stack manually; some architectures have a limit on how
311 	 * far ahead a user-space access may be in order to grow the stack.
312 	 */
313 	vma = find_extend_vma(mm, bprm->p);
314 	if (!vma)
315 		return -EFAULT;
316 
317 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
318 	if (put_user(argc, sp++))
319 		return -EFAULT;
320 
321 	/* Populate list of argv pointers back to argv strings. */
322 	p = mm->arg_end = mm->arg_start;
323 	while (argc-- > 0) {
324 		size_t len;
325 		if (put_user((elf_addr_t)p, sp++))
326 			return -EFAULT;
327 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 		if (!len || len > MAX_ARG_STRLEN)
329 			return -EINVAL;
330 		p += len;
331 	}
332 	if (put_user(0, sp++))
333 		return -EFAULT;
334 	mm->arg_end = p;
335 
336 	/* Populate list of envp pointers back to envp strings. */
337 	mm->env_end = mm->env_start = p;
338 	while (envc-- > 0) {
339 		size_t len;
340 		if (put_user((elf_addr_t)p, sp++))
341 			return -EFAULT;
342 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
343 		if (!len || len > MAX_ARG_STRLEN)
344 			return -EINVAL;
345 		p += len;
346 	}
347 	if (put_user(0, sp++))
348 		return -EFAULT;
349 	mm->env_end = p;
350 
351 	/* Put the elf_info on the stack in the right place.  */
352 	if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
353 		return -EFAULT;
354 	return 0;
355 }
356 
357 static unsigned long elf_map(struct file *filep, unsigned long addr,
358 		const struct elf_phdr *eppnt, int prot, int type,
359 		unsigned long total_size)
360 {
361 	unsigned long map_addr;
362 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
363 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
364 	addr = ELF_PAGESTART(addr);
365 	size = ELF_PAGEALIGN(size);
366 
367 	/* mmap() will return -EINVAL if given a zero size, but a
368 	 * segment with zero filesize is perfectly valid */
369 	if (!size)
370 		return addr;
371 
372 	/*
373 	* total_size is the size of the ELF (interpreter) image.
374 	* The _first_ mmap needs to know the full size, otherwise
375 	* randomization might put this image into an overlapping
376 	* position with the ELF binary image. (since size < total_size)
377 	* So we first map the 'big' image - and unmap the remainder at
378 	* the end. (which unmap is needed for ELF images with holes.)
379 	*/
380 	if (total_size) {
381 		total_size = ELF_PAGEALIGN(total_size);
382 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
383 		if (!BAD_ADDR(map_addr))
384 			vm_munmap(map_addr+size, total_size-size);
385 	} else
386 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
387 
388 	if ((type & MAP_FIXED_NOREPLACE) &&
389 	    PTR_ERR((void *)map_addr) == -EEXIST)
390 		pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
391 			task_pid_nr(current), current->comm, (void *)addr);
392 
393 	return(map_addr);
394 }
395 
396 static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
397 {
398 	int i, first_idx = -1, last_idx = -1;
399 
400 	for (i = 0; i < nr; i++) {
401 		if (cmds[i].p_type == PT_LOAD) {
402 			last_idx = i;
403 			if (first_idx == -1)
404 				first_idx = i;
405 		}
406 	}
407 	if (first_idx == -1)
408 		return 0;
409 
410 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
411 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
412 }
413 
414 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
415 {
416 	ssize_t rv;
417 
418 	rv = kernel_read(file, buf, len, &pos);
419 	if (unlikely(rv != len)) {
420 		return (rv < 0) ? rv : -EIO;
421 	}
422 	return 0;
423 }
424 
425 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
426 {
427 	unsigned long alignment = 0;
428 	int i;
429 
430 	for (i = 0; i < nr; i++) {
431 		if (cmds[i].p_type == PT_LOAD) {
432 			unsigned long p_align = cmds[i].p_align;
433 
434 			/* skip non-power of two alignments as invalid */
435 			if (!is_power_of_2(p_align))
436 				continue;
437 			alignment = max(alignment, p_align);
438 		}
439 	}
440 
441 	/* ensure we align to at least one page */
442 	return ELF_PAGEALIGN(alignment);
443 }
444 
445 /**
446  * load_elf_phdrs() - load ELF program headers
447  * @elf_ex:   ELF header of the binary whose program headers should be loaded
448  * @elf_file: the opened ELF binary file
449  *
450  * Loads ELF program headers from the binary file elf_file, which has the ELF
451  * header pointed to by elf_ex, into a newly allocated array. The caller is
452  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
453  */
454 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
455 				       struct file *elf_file)
456 {
457 	struct elf_phdr *elf_phdata = NULL;
458 	int retval, err = -1;
459 	unsigned int size;
460 
461 	/*
462 	 * If the size of this structure has changed, then punt, since
463 	 * we will be doing the wrong thing.
464 	 */
465 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
466 		goto out;
467 
468 	/* Sanity check the number of program headers... */
469 	/* ...and their total size. */
470 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
471 	if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
472 		goto out;
473 
474 	elf_phdata = kmalloc(size, GFP_KERNEL);
475 	if (!elf_phdata)
476 		goto out;
477 
478 	/* Read in the program headers */
479 	retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
480 	if (retval < 0) {
481 		err = retval;
482 		goto out;
483 	}
484 
485 	/* Success! */
486 	err = 0;
487 out:
488 	if (err) {
489 		kfree(elf_phdata);
490 		elf_phdata = NULL;
491 	}
492 	return elf_phdata;
493 }
494 
495 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
496 
497 /**
498  * struct arch_elf_state - arch-specific ELF loading state
499  *
500  * This structure is used to preserve architecture specific data during
501  * the loading of an ELF file, throughout the checking of architecture
502  * specific ELF headers & through to the point where the ELF load is
503  * known to be proceeding (ie. SET_PERSONALITY).
504  *
505  * This implementation is a dummy for architectures which require no
506  * specific state.
507  */
508 struct arch_elf_state {
509 };
510 
511 #define INIT_ARCH_ELF_STATE {}
512 
513 /**
514  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
515  * @ehdr:	The main ELF header
516  * @phdr:	The program header to check
517  * @elf:	The open ELF file
518  * @is_interp:	True if the phdr is from the interpreter of the ELF being
519  *		loaded, else false.
520  * @state:	Architecture-specific state preserved throughout the process
521  *		of loading the ELF.
522  *
523  * Inspects the program header phdr to validate its correctness and/or
524  * suitability for the system. Called once per ELF program header in the
525  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
526  * interpreter.
527  *
528  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
529  *         with that return code.
530  */
531 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
532 				   struct elf_phdr *phdr,
533 				   struct file *elf, bool is_interp,
534 				   struct arch_elf_state *state)
535 {
536 	/* Dummy implementation, always proceed */
537 	return 0;
538 }
539 
540 /**
541  * arch_check_elf() - check an ELF executable
542  * @ehdr:	The main ELF header
543  * @has_interp:	True if the ELF has an interpreter, else false.
544  * @interp_ehdr: The interpreter's ELF header
545  * @state:	Architecture-specific state preserved throughout the process
546  *		of loading the ELF.
547  *
548  * Provides a final opportunity for architecture code to reject the loading
549  * of the ELF & cause an exec syscall to return an error. This is called after
550  * all program headers to be checked by arch_elf_pt_proc have been.
551  *
552  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
553  *         with that return code.
554  */
555 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
556 				 struct elfhdr *interp_ehdr,
557 				 struct arch_elf_state *state)
558 {
559 	/* Dummy implementation, always proceed */
560 	return 0;
561 }
562 
563 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
564 
565 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
566 			    bool has_interp, bool is_interp)
567 {
568 	int prot = 0;
569 
570 	if (p_flags & PF_R)
571 		prot |= PROT_READ;
572 	if (p_flags & PF_W)
573 		prot |= PROT_WRITE;
574 	if (p_flags & PF_X)
575 		prot |= PROT_EXEC;
576 
577 	return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
578 }
579 
580 /* This is much more generalized than the library routine read function,
581    so we keep this separate.  Technically the library read function
582    is only provided so that we can read a.out libraries that have
583    an ELF header */
584 
585 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
586 		struct file *interpreter,
587 		unsigned long no_base, struct elf_phdr *interp_elf_phdata,
588 		struct arch_elf_state *arch_state)
589 {
590 	struct elf_phdr *eppnt;
591 	unsigned long load_addr = 0;
592 	int load_addr_set = 0;
593 	unsigned long last_bss = 0, elf_bss = 0;
594 	int bss_prot = 0;
595 	unsigned long error = ~0UL;
596 	unsigned long total_size;
597 	int i;
598 
599 	/* First of all, some simple consistency checks */
600 	if (interp_elf_ex->e_type != ET_EXEC &&
601 	    interp_elf_ex->e_type != ET_DYN)
602 		goto out;
603 	if (!elf_check_arch(interp_elf_ex) ||
604 	    elf_check_fdpic(interp_elf_ex))
605 		goto out;
606 	if (!interpreter->f_op->mmap)
607 		goto out;
608 
609 	total_size = total_mapping_size(interp_elf_phdata,
610 					interp_elf_ex->e_phnum);
611 	if (!total_size) {
612 		error = -EINVAL;
613 		goto out;
614 	}
615 
616 	eppnt = interp_elf_phdata;
617 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
618 		if (eppnt->p_type == PT_LOAD) {
619 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
620 			int elf_prot = make_prot(eppnt->p_flags, arch_state,
621 						 true, true);
622 			unsigned long vaddr = 0;
623 			unsigned long k, map_addr;
624 
625 			vaddr = eppnt->p_vaddr;
626 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
627 				elf_type |= MAP_FIXED_NOREPLACE;
628 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
629 				load_addr = -vaddr;
630 
631 			map_addr = elf_map(interpreter, load_addr + vaddr,
632 					eppnt, elf_prot, elf_type, total_size);
633 			total_size = 0;
634 			error = map_addr;
635 			if (BAD_ADDR(map_addr))
636 				goto out;
637 
638 			if (!load_addr_set &&
639 			    interp_elf_ex->e_type == ET_DYN) {
640 				load_addr = map_addr - ELF_PAGESTART(vaddr);
641 				load_addr_set = 1;
642 			}
643 
644 			/*
645 			 * Check to see if the section's size will overflow the
646 			 * allowed task size. Note that p_filesz must always be
647 			 * <= p_memsize so it's only necessary to check p_memsz.
648 			 */
649 			k = load_addr + eppnt->p_vaddr;
650 			if (BAD_ADDR(k) ||
651 			    eppnt->p_filesz > eppnt->p_memsz ||
652 			    eppnt->p_memsz > TASK_SIZE ||
653 			    TASK_SIZE - eppnt->p_memsz < k) {
654 				error = -ENOMEM;
655 				goto out;
656 			}
657 
658 			/*
659 			 * Find the end of the file mapping for this phdr, and
660 			 * keep track of the largest address we see for this.
661 			 */
662 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
663 			if (k > elf_bss)
664 				elf_bss = k;
665 
666 			/*
667 			 * Do the same thing for the memory mapping - between
668 			 * elf_bss and last_bss is the bss section.
669 			 */
670 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
671 			if (k > last_bss) {
672 				last_bss = k;
673 				bss_prot = elf_prot;
674 			}
675 		}
676 	}
677 
678 	/*
679 	 * Now fill out the bss section: first pad the last page from
680 	 * the file up to the page boundary, and zero it from elf_bss
681 	 * up to the end of the page.
682 	 */
683 	if (padzero(elf_bss)) {
684 		error = -EFAULT;
685 		goto out;
686 	}
687 	/*
688 	 * Next, align both the file and mem bss up to the page size,
689 	 * since this is where elf_bss was just zeroed up to, and where
690 	 * last_bss will end after the vm_brk_flags() below.
691 	 */
692 	elf_bss = ELF_PAGEALIGN(elf_bss);
693 	last_bss = ELF_PAGEALIGN(last_bss);
694 	/* Finally, if there is still more bss to allocate, do it. */
695 	if (last_bss > elf_bss) {
696 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
697 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
698 		if (error)
699 			goto out;
700 	}
701 
702 	error = load_addr;
703 out:
704 	return error;
705 }
706 
707 /*
708  * These are the functions used to load ELF style executables and shared
709  * libraries.  There is no binary dependent code anywhere else.
710  */
711 
712 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
713 			      struct arch_elf_state *arch,
714 			      bool have_prev_type, u32 *prev_type)
715 {
716 	size_t o, step;
717 	const struct gnu_property *pr;
718 	int ret;
719 
720 	if (*off == datasz)
721 		return -ENOENT;
722 
723 	if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
724 		return -EIO;
725 	o = *off;
726 	datasz -= *off;
727 
728 	if (datasz < sizeof(*pr))
729 		return -ENOEXEC;
730 	pr = (const struct gnu_property *)(data + o);
731 	o += sizeof(*pr);
732 	datasz -= sizeof(*pr);
733 
734 	if (pr->pr_datasz > datasz)
735 		return -ENOEXEC;
736 
737 	WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
738 	step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
739 	if (step > datasz)
740 		return -ENOEXEC;
741 
742 	/* Properties are supposed to be unique and sorted on pr_type: */
743 	if (have_prev_type && pr->pr_type <= *prev_type)
744 		return -ENOEXEC;
745 	*prev_type = pr->pr_type;
746 
747 	ret = arch_parse_elf_property(pr->pr_type, data + o,
748 				      pr->pr_datasz, ELF_COMPAT, arch);
749 	if (ret)
750 		return ret;
751 
752 	*off = o + step;
753 	return 0;
754 }
755 
756 #define NOTE_DATA_SZ SZ_1K
757 #define GNU_PROPERTY_TYPE_0_NAME "GNU"
758 #define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
759 
760 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
761 				struct arch_elf_state *arch)
762 {
763 	union {
764 		struct elf_note nhdr;
765 		char data[NOTE_DATA_SZ];
766 	} note;
767 	loff_t pos;
768 	ssize_t n;
769 	size_t off, datasz;
770 	int ret;
771 	bool have_prev_type;
772 	u32 prev_type;
773 
774 	if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
775 		return 0;
776 
777 	/* load_elf_binary() shouldn't call us unless this is true... */
778 	if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
779 		return -ENOEXEC;
780 
781 	/* If the properties are crazy large, that's too bad (for now): */
782 	if (phdr->p_filesz > sizeof(note))
783 		return -ENOEXEC;
784 
785 	pos = phdr->p_offset;
786 	n = kernel_read(f, &note, phdr->p_filesz, &pos);
787 
788 	BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
789 	if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
790 		return -EIO;
791 
792 	if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
793 	    note.nhdr.n_namesz != NOTE_NAME_SZ ||
794 	    strncmp(note.data + sizeof(note.nhdr),
795 		    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
796 		return -ENOEXEC;
797 
798 	off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
799 		       ELF_GNU_PROPERTY_ALIGN);
800 	if (off > n)
801 		return -ENOEXEC;
802 
803 	if (note.nhdr.n_descsz > n - off)
804 		return -ENOEXEC;
805 	datasz = off + note.nhdr.n_descsz;
806 
807 	have_prev_type = false;
808 	do {
809 		ret = parse_elf_property(note.data, &off, datasz, arch,
810 					 have_prev_type, &prev_type);
811 		have_prev_type = true;
812 	} while (!ret);
813 
814 	return ret == -ENOENT ? 0 : ret;
815 }
816 
817 static int load_elf_binary(struct linux_binprm *bprm)
818 {
819 	struct file *interpreter = NULL; /* to shut gcc up */
820  	unsigned long load_addr = 0, load_bias = 0;
821 	int load_addr_set = 0;
822 	unsigned long error;
823 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
824 	struct elf_phdr *elf_property_phdata = NULL;
825 	unsigned long elf_bss, elf_brk;
826 	int bss_prot = 0;
827 	int retval, i;
828 	unsigned long elf_entry;
829 	unsigned long e_entry;
830 	unsigned long interp_load_addr = 0;
831 	unsigned long start_code, end_code, start_data, end_data;
832 	unsigned long reloc_func_desc __maybe_unused = 0;
833 	int executable_stack = EXSTACK_DEFAULT;
834 	struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
835 	struct elfhdr *interp_elf_ex = NULL;
836 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
837 	struct mm_struct *mm;
838 	struct pt_regs *regs;
839 
840 	retval = -ENOEXEC;
841 	/* First of all, some simple consistency checks */
842 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
843 		goto out;
844 
845 	if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
846 		goto out;
847 	if (!elf_check_arch(elf_ex))
848 		goto out;
849 	if (elf_check_fdpic(elf_ex))
850 		goto out;
851 	if (!bprm->file->f_op->mmap)
852 		goto out;
853 
854 	elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
855 	if (!elf_phdata)
856 		goto out;
857 
858 	elf_ppnt = elf_phdata;
859 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
860 		char *elf_interpreter;
861 
862 		if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
863 			elf_property_phdata = elf_ppnt;
864 			continue;
865 		}
866 
867 		if (elf_ppnt->p_type != PT_INTERP)
868 			continue;
869 
870 		/*
871 		 * This is the program interpreter used for shared libraries -
872 		 * for now assume that this is an a.out format binary.
873 		 */
874 		retval = -ENOEXEC;
875 		if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
876 			goto out_free_ph;
877 
878 		retval = -ENOMEM;
879 		elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
880 		if (!elf_interpreter)
881 			goto out_free_ph;
882 
883 		retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
884 				  elf_ppnt->p_offset);
885 		if (retval < 0)
886 			goto out_free_interp;
887 		/* make sure path is NULL terminated */
888 		retval = -ENOEXEC;
889 		if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
890 			goto out_free_interp;
891 
892 		interpreter = open_exec(elf_interpreter);
893 		kfree(elf_interpreter);
894 		retval = PTR_ERR(interpreter);
895 		if (IS_ERR(interpreter))
896 			goto out_free_ph;
897 
898 		/*
899 		 * If the binary is not readable then enforce mm->dumpable = 0
900 		 * regardless of the interpreter's permissions.
901 		 */
902 		would_dump(bprm, interpreter);
903 
904 		interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
905 		if (!interp_elf_ex) {
906 			retval = -ENOMEM;
907 			goto out_free_ph;
908 		}
909 
910 		/* Get the exec headers */
911 		retval = elf_read(interpreter, interp_elf_ex,
912 				  sizeof(*interp_elf_ex), 0);
913 		if (retval < 0)
914 			goto out_free_dentry;
915 
916 		break;
917 
918 out_free_interp:
919 		kfree(elf_interpreter);
920 		goto out_free_ph;
921 	}
922 
923 	elf_ppnt = elf_phdata;
924 	for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
925 		switch (elf_ppnt->p_type) {
926 		case PT_GNU_STACK:
927 			if (elf_ppnt->p_flags & PF_X)
928 				executable_stack = EXSTACK_ENABLE_X;
929 			else
930 				executable_stack = EXSTACK_DISABLE_X;
931 			break;
932 
933 		case PT_LOPROC ... PT_HIPROC:
934 			retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
935 						  bprm->file, false,
936 						  &arch_state);
937 			if (retval)
938 				goto out_free_dentry;
939 			break;
940 		}
941 
942 	/* Some simple consistency checks for the interpreter */
943 	if (interpreter) {
944 		retval = -ELIBBAD;
945 		/* Not an ELF interpreter */
946 		if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
947 			goto out_free_dentry;
948 		/* Verify the interpreter has a valid arch */
949 		if (!elf_check_arch(interp_elf_ex) ||
950 		    elf_check_fdpic(interp_elf_ex))
951 			goto out_free_dentry;
952 
953 		/* Load the interpreter program headers */
954 		interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
955 						   interpreter);
956 		if (!interp_elf_phdata)
957 			goto out_free_dentry;
958 
959 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
960 		elf_property_phdata = NULL;
961 		elf_ppnt = interp_elf_phdata;
962 		for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
963 			switch (elf_ppnt->p_type) {
964 			case PT_GNU_PROPERTY:
965 				elf_property_phdata = elf_ppnt;
966 				break;
967 
968 			case PT_LOPROC ... PT_HIPROC:
969 				retval = arch_elf_pt_proc(interp_elf_ex,
970 							  elf_ppnt, interpreter,
971 							  true, &arch_state);
972 				if (retval)
973 					goto out_free_dentry;
974 				break;
975 			}
976 	}
977 
978 	retval = parse_elf_properties(interpreter ?: bprm->file,
979 				      elf_property_phdata, &arch_state);
980 	if (retval)
981 		goto out_free_dentry;
982 
983 	/*
984 	 * Allow arch code to reject the ELF at this point, whilst it's
985 	 * still possible to return an error to the code that invoked
986 	 * the exec syscall.
987 	 */
988 	retval = arch_check_elf(elf_ex,
989 				!!interpreter, interp_elf_ex,
990 				&arch_state);
991 	if (retval)
992 		goto out_free_dentry;
993 
994 	/* Flush all traces of the currently running executable */
995 	retval = begin_new_exec(bprm);
996 	if (retval)
997 		goto out_free_dentry;
998 
999 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
1000 	   may depend on the personality.  */
1001 	SET_PERSONALITY2(*elf_ex, &arch_state);
1002 	if (elf_read_implies_exec(*elf_ex, executable_stack))
1003 		current->personality |= READ_IMPLIES_EXEC;
1004 
1005 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1006 		current->flags |= PF_RANDOMIZE;
1007 
1008 	setup_new_exec(bprm);
1009 
1010 	/* Do this so that we can load the interpreter, if need be.  We will
1011 	   change some of these later */
1012 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1013 				 executable_stack);
1014 	if (retval < 0)
1015 		goto out_free_dentry;
1016 
1017 	elf_bss = 0;
1018 	elf_brk = 0;
1019 
1020 	start_code = ~0UL;
1021 	end_code = 0;
1022 	start_data = 0;
1023 	end_data = 0;
1024 
1025 	/* Now we do a little grungy work by mmapping the ELF image into
1026 	   the correct location in memory. */
1027 	for(i = 0, elf_ppnt = elf_phdata;
1028 	    i < elf_ex->e_phnum; i++, elf_ppnt++) {
1029 		int elf_prot, elf_flags;
1030 		unsigned long k, vaddr;
1031 		unsigned long total_size = 0;
1032 		unsigned long alignment;
1033 
1034 		if (elf_ppnt->p_type != PT_LOAD)
1035 			continue;
1036 
1037 		if (unlikely (elf_brk > elf_bss)) {
1038 			unsigned long nbyte;
1039 
1040 			/* There was a PT_LOAD segment with p_memsz > p_filesz
1041 			   before this one. Map anonymous pages, if needed,
1042 			   and clear the area.  */
1043 			retval = set_brk(elf_bss + load_bias,
1044 					 elf_brk + load_bias,
1045 					 bss_prot);
1046 			if (retval)
1047 				goto out_free_dentry;
1048 			nbyte = ELF_PAGEOFFSET(elf_bss);
1049 			if (nbyte) {
1050 				nbyte = ELF_MIN_ALIGN - nbyte;
1051 				if (nbyte > elf_brk - elf_bss)
1052 					nbyte = elf_brk - elf_bss;
1053 				if (clear_user((void __user *)elf_bss +
1054 							load_bias, nbyte)) {
1055 					/*
1056 					 * This bss-zeroing can fail if the ELF
1057 					 * file specifies odd protections. So
1058 					 * we don't check the return value
1059 					 */
1060 				}
1061 			}
1062 		}
1063 
1064 		elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1065 				     !!interpreter, false);
1066 
1067 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1068 
1069 		vaddr = elf_ppnt->p_vaddr;
1070 		/*
1071 		 * If we are loading ET_EXEC or we have already performed
1072 		 * the ET_DYN load_addr calculations, proceed normally.
1073 		 */
1074 		if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1075 			elf_flags |= MAP_FIXED;
1076 		} else if (elf_ex->e_type == ET_DYN) {
1077 			/*
1078 			 * This logic is run once for the first LOAD Program
1079 			 * Header for ET_DYN binaries to calculate the
1080 			 * randomization (load_bias) for all the LOAD
1081 			 * Program Headers, and to calculate the entire
1082 			 * size of the ELF mapping (total_size). (Note that
1083 			 * load_addr_set is set to true later once the
1084 			 * initial mapping is performed.)
1085 			 *
1086 			 * There are effectively two types of ET_DYN
1087 			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1088 			 * and loaders (ET_DYN without INTERP, since they
1089 			 * _are_ the ELF interpreter). The loaders must
1090 			 * be loaded away from programs since the program
1091 			 * may otherwise collide with the loader (especially
1092 			 * for ET_EXEC which does not have a randomized
1093 			 * position). For example to handle invocations of
1094 			 * "./ld.so someprog" to test out a new version of
1095 			 * the loader, the subsequent program that the
1096 			 * loader loads must avoid the loader itself, so
1097 			 * they cannot share the same load range. Sufficient
1098 			 * room for the brk must be allocated with the
1099 			 * loader as well, since brk must be available with
1100 			 * the loader.
1101 			 *
1102 			 * Therefore, programs are loaded offset from
1103 			 * ELF_ET_DYN_BASE and loaders are loaded into the
1104 			 * independently randomized mmap region (0 load_bias
1105 			 * without MAP_FIXED).
1106 			 */
1107 			if (interpreter) {
1108 				load_bias = ELF_ET_DYN_BASE;
1109 				if (current->flags & PF_RANDOMIZE)
1110 					load_bias += arch_mmap_rnd();
1111 				alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1112 				if (alignment)
1113 					load_bias &= ~(alignment - 1);
1114 				elf_flags |= MAP_FIXED;
1115 			} else
1116 				load_bias = 0;
1117 
1118 			/*
1119 			 * Since load_bias is used for all subsequent loading
1120 			 * calculations, we must lower it by the first vaddr
1121 			 * so that the remaining calculations based on the
1122 			 * ELF vaddrs will be correctly offset. The result
1123 			 * is then page aligned.
1124 			 */
1125 			load_bias = ELF_PAGESTART(load_bias - vaddr);
1126 
1127 			total_size = total_mapping_size(elf_phdata,
1128 							elf_ex->e_phnum);
1129 			if (!total_size) {
1130 				retval = -EINVAL;
1131 				goto out_free_dentry;
1132 			}
1133 		}
1134 
1135 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1136 				elf_prot, elf_flags, total_size);
1137 		if (BAD_ADDR(error)) {
1138 			retval = IS_ERR((void *)error) ?
1139 				PTR_ERR((void*)error) : -EINVAL;
1140 			goto out_free_dentry;
1141 		}
1142 
1143 		if (!load_addr_set) {
1144 			load_addr_set = 1;
1145 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1146 			if (elf_ex->e_type == ET_DYN) {
1147 				load_bias += error -
1148 				             ELF_PAGESTART(load_bias + vaddr);
1149 				load_addr += load_bias;
1150 				reloc_func_desc = load_bias;
1151 			}
1152 		}
1153 		k = elf_ppnt->p_vaddr;
1154 		if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1155 			start_code = k;
1156 		if (start_data < k)
1157 			start_data = k;
1158 
1159 		/*
1160 		 * Check to see if the section's size will overflow the
1161 		 * allowed task size. Note that p_filesz must always be
1162 		 * <= p_memsz so it is only necessary to check p_memsz.
1163 		 */
1164 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1165 		    elf_ppnt->p_memsz > TASK_SIZE ||
1166 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
1167 			/* set_brk can never work. Avoid overflows. */
1168 			retval = -EINVAL;
1169 			goto out_free_dentry;
1170 		}
1171 
1172 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1173 
1174 		if (k > elf_bss)
1175 			elf_bss = k;
1176 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1177 			end_code = k;
1178 		if (end_data < k)
1179 			end_data = k;
1180 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1181 		if (k > elf_brk) {
1182 			bss_prot = elf_prot;
1183 			elf_brk = k;
1184 		}
1185 	}
1186 
1187 	e_entry = elf_ex->e_entry + load_bias;
1188 	elf_bss += load_bias;
1189 	elf_brk += load_bias;
1190 	start_code += load_bias;
1191 	end_code += load_bias;
1192 	start_data += load_bias;
1193 	end_data += load_bias;
1194 
1195 	/* Calling set_brk effectively mmaps the pages that we need
1196 	 * for the bss and break sections.  We must do this before
1197 	 * mapping in the interpreter, to make sure it doesn't wind
1198 	 * up getting placed where the bss needs to go.
1199 	 */
1200 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1201 	if (retval)
1202 		goto out_free_dentry;
1203 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1204 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1205 		goto out_free_dentry;
1206 	}
1207 
1208 	if (interpreter) {
1209 		elf_entry = load_elf_interp(interp_elf_ex,
1210 					    interpreter,
1211 					    load_bias, interp_elf_phdata,
1212 					    &arch_state);
1213 		if (!IS_ERR((void *)elf_entry)) {
1214 			/*
1215 			 * load_elf_interp() returns relocation
1216 			 * adjustment
1217 			 */
1218 			interp_load_addr = elf_entry;
1219 			elf_entry += interp_elf_ex->e_entry;
1220 		}
1221 		if (BAD_ADDR(elf_entry)) {
1222 			retval = IS_ERR((void *)elf_entry) ?
1223 					(int)elf_entry : -EINVAL;
1224 			goto out_free_dentry;
1225 		}
1226 		reloc_func_desc = interp_load_addr;
1227 
1228 		allow_write_access(interpreter);
1229 		fput(interpreter);
1230 
1231 		kfree(interp_elf_ex);
1232 		kfree(interp_elf_phdata);
1233 	} else {
1234 		elf_entry = e_entry;
1235 		if (BAD_ADDR(elf_entry)) {
1236 			retval = -EINVAL;
1237 			goto out_free_dentry;
1238 		}
1239 	}
1240 
1241 	kfree(elf_phdata);
1242 
1243 	set_binfmt(&elf_format);
1244 
1245 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1246 	retval = arch_setup_additional_pages(bprm, !!interpreter);
1247 	if (retval < 0)
1248 		goto out;
1249 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1250 
1251 	retval = create_elf_tables(bprm, elf_ex,
1252 			  load_addr, interp_load_addr, e_entry);
1253 	if (retval < 0)
1254 		goto out;
1255 
1256 	mm = current->mm;
1257 	mm->end_code = end_code;
1258 	mm->start_code = start_code;
1259 	mm->start_data = start_data;
1260 	mm->end_data = end_data;
1261 	mm->start_stack = bprm->p;
1262 
1263 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1264 		/*
1265 		 * For architectures with ELF randomization, when executing
1266 		 * a loader directly (i.e. no interpreter listed in ELF
1267 		 * headers), move the brk area out of the mmap region
1268 		 * (since it grows up, and may collide early with the stack
1269 		 * growing down), and into the unused ELF_ET_DYN_BASE region.
1270 		 */
1271 		if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1272 		    elf_ex->e_type == ET_DYN && !interpreter) {
1273 			mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1274 		}
1275 
1276 		mm->brk = mm->start_brk = arch_randomize_brk(mm);
1277 #ifdef compat_brk_randomized
1278 		current->brk_randomized = 1;
1279 #endif
1280 	}
1281 
1282 	if (current->personality & MMAP_PAGE_ZERO) {
1283 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1284 		   and some applications "depend" upon this behavior.
1285 		   Since we do not have the power to recompile these, we
1286 		   emulate the SVr4 behavior. Sigh. */
1287 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1288 				MAP_FIXED | MAP_PRIVATE, 0);
1289 	}
1290 
1291 	regs = current_pt_regs();
1292 #ifdef ELF_PLAT_INIT
1293 	/*
1294 	 * The ABI may specify that certain registers be set up in special
1295 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1296 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1297 	 * that the e_entry field is the address of the function descriptor
1298 	 * for the startup routine, rather than the address of the startup
1299 	 * routine itself.  This macro performs whatever initialization to
1300 	 * the regs structure is required as well as any relocations to the
1301 	 * function descriptor entries when executing dynamically links apps.
1302 	 */
1303 	ELF_PLAT_INIT(regs, reloc_func_desc);
1304 #endif
1305 
1306 	finalize_exec(bprm);
1307 	start_thread(regs, elf_entry, bprm->p);
1308 	retval = 0;
1309 out:
1310 	return retval;
1311 
1312 	/* error cleanup */
1313 out_free_dentry:
1314 	kfree(interp_elf_ex);
1315 	kfree(interp_elf_phdata);
1316 	allow_write_access(interpreter);
1317 	if (interpreter)
1318 		fput(interpreter);
1319 out_free_ph:
1320 	kfree(elf_phdata);
1321 	goto out;
1322 }
1323 
1324 #ifdef CONFIG_USELIB
1325 /* This is really simpleminded and specialized - we are loading an
1326    a.out library that is given an ELF header. */
1327 static int load_elf_library(struct file *file)
1328 {
1329 	struct elf_phdr *elf_phdata;
1330 	struct elf_phdr *eppnt;
1331 	unsigned long elf_bss, bss, len;
1332 	int retval, error, i, j;
1333 	struct elfhdr elf_ex;
1334 
1335 	error = -ENOEXEC;
1336 	retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1337 	if (retval < 0)
1338 		goto out;
1339 
1340 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1341 		goto out;
1342 
1343 	/* First of all, some simple consistency checks */
1344 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1345 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1346 		goto out;
1347 	if (elf_check_fdpic(&elf_ex))
1348 		goto out;
1349 
1350 	/* Now read in all of the header information */
1351 
1352 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1353 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1354 
1355 	error = -ENOMEM;
1356 	elf_phdata = kmalloc(j, GFP_KERNEL);
1357 	if (!elf_phdata)
1358 		goto out;
1359 
1360 	eppnt = elf_phdata;
1361 	error = -ENOEXEC;
1362 	retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1363 	if (retval < 0)
1364 		goto out_free_ph;
1365 
1366 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1367 		if ((eppnt + i)->p_type == PT_LOAD)
1368 			j++;
1369 	if (j != 1)
1370 		goto out_free_ph;
1371 
1372 	while (eppnt->p_type != PT_LOAD)
1373 		eppnt++;
1374 
1375 	/* Now use mmap to map the library into memory. */
1376 	error = vm_mmap(file,
1377 			ELF_PAGESTART(eppnt->p_vaddr),
1378 			(eppnt->p_filesz +
1379 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1380 			PROT_READ | PROT_WRITE | PROT_EXEC,
1381 			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1382 			(eppnt->p_offset -
1383 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1384 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1385 		goto out_free_ph;
1386 
1387 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1388 	if (padzero(elf_bss)) {
1389 		error = -EFAULT;
1390 		goto out_free_ph;
1391 	}
1392 
1393 	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1394 	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1395 	if (bss > len) {
1396 		error = vm_brk(len, bss - len);
1397 		if (error)
1398 			goto out_free_ph;
1399 	}
1400 	error = 0;
1401 
1402 out_free_ph:
1403 	kfree(elf_phdata);
1404 out:
1405 	return error;
1406 }
1407 #endif /* #ifdef CONFIG_USELIB */
1408 
1409 #ifdef CONFIG_ELF_CORE
1410 /*
1411  * ELF core dumper
1412  *
1413  * Modelled on fs/exec.c:aout_core_dump()
1414  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1415  */
1416 
1417 /* An ELF note in memory */
1418 struct memelfnote
1419 {
1420 	const char *name;
1421 	int type;
1422 	unsigned int datasz;
1423 	void *data;
1424 };
1425 
1426 static int notesize(struct memelfnote *en)
1427 {
1428 	int sz;
1429 
1430 	sz = sizeof(struct elf_note);
1431 	sz += roundup(strlen(en->name) + 1, 4);
1432 	sz += roundup(en->datasz, 4);
1433 
1434 	return sz;
1435 }
1436 
1437 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1438 {
1439 	struct elf_note en;
1440 	en.n_namesz = strlen(men->name) + 1;
1441 	en.n_descsz = men->datasz;
1442 	en.n_type = men->type;
1443 
1444 	return dump_emit(cprm, &en, sizeof(en)) &&
1445 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1446 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1447 }
1448 
1449 static void fill_elf_header(struct elfhdr *elf, int segs,
1450 			    u16 machine, u32 flags)
1451 {
1452 	memset(elf, 0, sizeof(*elf));
1453 
1454 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1455 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1456 	elf->e_ident[EI_DATA] = ELF_DATA;
1457 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1458 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1459 
1460 	elf->e_type = ET_CORE;
1461 	elf->e_machine = machine;
1462 	elf->e_version = EV_CURRENT;
1463 	elf->e_phoff = sizeof(struct elfhdr);
1464 	elf->e_flags = flags;
1465 	elf->e_ehsize = sizeof(struct elfhdr);
1466 	elf->e_phentsize = sizeof(struct elf_phdr);
1467 	elf->e_phnum = segs;
1468 }
1469 
1470 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1471 {
1472 	phdr->p_type = PT_NOTE;
1473 	phdr->p_offset = offset;
1474 	phdr->p_vaddr = 0;
1475 	phdr->p_paddr = 0;
1476 	phdr->p_filesz = sz;
1477 	phdr->p_memsz = 0;
1478 	phdr->p_flags = 0;
1479 	phdr->p_align = 0;
1480 }
1481 
1482 static void fill_note(struct memelfnote *note, const char *name, int type,
1483 		unsigned int sz, void *data)
1484 {
1485 	note->name = name;
1486 	note->type = type;
1487 	note->datasz = sz;
1488 	note->data = data;
1489 }
1490 
1491 /*
1492  * fill up all the fields in prstatus from the given task struct, except
1493  * registers which need to be filled up separately.
1494  */
1495 static void fill_prstatus(struct elf_prstatus *prstatus,
1496 		struct task_struct *p, long signr)
1497 {
1498 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1499 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1500 	prstatus->pr_sighold = p->blocked.sig[0];
1501 	rcu_read_lock();
1502 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1503 	rcu_read_unlock();
1504 	prstatus->pr_pid = task_pid_vnr(p);
1505 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1506 	prstatus->pr_sid = task_session_vnr(p);
1507 	if (thread_group_leader(p)) {
1508 		struct task_cputime cputime;
1509 
1510 		/*
1511 		 * This is the record for the group leader.  It shows the
1512 		 * group-wide total, not its individual thread total.
1513 		 */
1514 		thread_group_cputime(p, &cputime);
1515 		prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1516 		prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1517 	} else {
1518 		u64 utime, stime;
1519 
1520 		task_cputime(p, &utime, &stime);
1521 		prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1522 		prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1523 	}
1524 
1525 	prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1526 	prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1527 }
1528 
1529 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1530 		       struct mm_struct *mm)
1531 {
1532 	const struct cred *cred;
1533 	unsigned int i, len;
1534 
1535 	/* first copy the parameters from user space */
1536 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1537 
1538 	len = mm->arg_end - mm->arg_start;
1539 	if (len >= ELF_PRARGSZ)
1540 		len = ELF_PRARGSZ-1;
1541 	if (copy_from_user(&psinfo->pr_psargs,
1542 		           (const char __user *)mm->arg_start, len))
1543 		return -EFAULT;
1544 	for(i = 0; i < len; i++)
1545 		if (psinfo->pr_psargs[i] == 0)
1546 			psinfo->pr_psargs[i] = ' ';
1547 	psinfo->pr_psargs[len] = 0;
1548 
1549 	rcu_read_lock();
1550 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1551 	rcu_read_unlock();
1552 	psinfo->pr_pid = task_pid_vnr(p);
1553 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1554 	psinfo->pr_sid = task_session_vnr(p);
1555 
1556 	i = p->state ? ffz(~p->state) + 1 : 0;
1557 	psinfo->pr_state = i;
1558 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1559 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1560 	psinfo->pr_nice = task_nice(p);
1561 	psinfo->pr_flag = p->flags;
1562 	rcu_read_lock();
1563 	cred = __task_cred(p);
1564 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1565 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1566 	rcu_read_unlock();
1567 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1568 
1569 	return 0;
1570 }
1571 
1572 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1573 {
1574 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1575 	int i = 0;
1576 	do
1577 		i += 2;
1578 	while (auxv[i - 2] != AT_NULL);
1579 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1580 }
1581 
1582 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1583 		const kernel_siginfo_t *siginfo)
1584 {
1585 	copy_siginfo_to_external(csigdata, siginfo);
1586 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1587 }
1588 
1589 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1590 /*
1591  * Format of NT_FILE note:
1592  *
1593  * long count     -- how many files are mapped
1594  * long page_size -- units for file_ofs
1595  * array of [COUNT] elements of
1596  *   long start
1597  *   long end
1598  *   long file_ofs
1599  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1600  */
1601 static int fill_files_note(struct memelfnote *note)
1602 {
1603 	struct mm_struct *mm = current->mm;
1604 	struct vm_area_struct *vma;
1605 	unsigned count, size, names_ofs, remaining, n;
1606 	user_long_t *data;
1607 	user_long_t *start_end_ofs;
1608 	char *name_base, *name_curpos;
1609 
1610 	/* *Estimated* file count and total data size needed */
1611 	count = mm->map_count;
1612 	if (count > UINT_MAX / 64)
1613 		return -EINVAL;
1614 	size = count * 64;
1615 
1616 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1617  alloc:
1618 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1619 		return -EINVAL;
1620 	size = round_up(size, PAGE_SIZE);
1621 	/*
1622 	 * "size" can be 0 here legitimately.
1623 	 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1624 	 */
1625 	data = kvmalloc(size, GFP_KERNEL);
1626 	if (ZERO_OR_NULL_PTR(data))
1627 		return -ENOMEM;
1628 
1629 	start_end_ofs = data + 2;
1630 	name_base = name_curpos = ((char *)data) + names_ofs;
1631 	remaining = size - names_ofs;
1632 	count = 0;
1633 	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1634 		struct file *file;
1635 		const char *filename;
1636 
1637 		file = vma->vm_file;
1638 		if (!file)
1639 			continue;
1640 		filename = file_path(file, name_curpos, remaining);
1641 		if (IS_ERR(filename)) {
1642 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1643 				kvfree(data);
1644 				size = size * 5 / 4;
1645 				goto alloc;
1646 			}
1647 			continue;
1648 		}
1649 
1650 		/* file_path() fills at the end, move name down */
1651 		/* n = strlen(filename) + 1: */
1652 		n = (name_curpos + remaining) - filename;
1653 		remaining = filename - name_curpos;
1654 		memmove(name_curpos, filename, n);
1655 		name_curpos += n;
1656 
1657 		*start_end_ofs++ = vma->vm_start;
1658 		*start_end_ofs++ = vma->vm_end;
1659 		*start_end_ofs++ = vma->vm_pgoff;
1660 		count++;
1661 	}
1662 
1663 	/* Now we know exact count of files, can store it */
1664 	data[0] = count;
1665 	data[1] = PAGE_SIZE;
1666 	/*
1667 	 * Count usually is less than mm->map_count,
1668 	 * we need to move filenames down.
1669 	 */
1670 	n = mm->map_count - count;
1671 	if (n != 0) {
1672 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1673 		memmove(name_base - shift_bytes, name_base,
1674 			name_curpos - name_base);
1675 		name_curpos -= shift_bytes;
1676 	}
1677 
1678 	size = name_curpos - (char *)data;
1679 	fill_note(note, "CORE", NT_FILE, size, data);
1680 	return 0;
1681 }
1682 
1683 #ifdef CORE_DUMP_USE_REGSET
1684 #include <linux/regset.h>
1685 
1686 struct elf_thread_core_info {
1687 	struct elf_thread_core_info *next;
1688 	struct task_struct *task;
1689 	struct elf_prstatus prstatus;
1690 	struct memelfnote notes[0];
1691 };
1692 
1693 struct elf_note_info {
1694 	struct elf_thread_core_info *thread;
1695 	struct memelfnote psinfo;
1696 	struct memelfnote signote;
1697 	struct memelfnote auxv;
1698 	struct memelfnote files;
1699 	user_siginfo_t csigdata;
1700 	size_t size;
1701 	int thread_notes;
1702 };
1703 
1704 /*
1705  * When a regset has a writeback hook, we call it on each thread before
1706  * dumping user memory.  On register window machines, this makes sure the
1707  * user memory backing the register data is up to date before we read it.
1708  */
1709 static void do_thread_regset_writeback(struct task_struct *task,
1710 				       const struct user_regset *regset)
1711 {
1712 	if (regset->writeback)
1713 		regset->writeback(task, regset, 1);
1714 }
1715 
1716 #ifndef PRSTATUS_SIZE
1717 #define PRSTATUS_SIZE(S, R) sizeof(S)
1718 #endif
1719 
1720 #ifndef SET_PR_FPVALID
1721 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1722 #endif
1723 
1724 static int fill_thread_core_info(struct elf_thread_core_info *t,
1725 				 const struct user_regset_view *view,
1726 				 long signr, size_t *total)
1727 {
1728 	unsigned int i;
1729 	int regset0_size;
1730 
1731 	/*
1732 	 * NT_PRSTATUS is the one special case, because the regset data
1733 	 * goes into the pr_reg field inside the note contents, rather
1734 	 * than being the whole note contents.  We fill the reset in here.
1735 	 * We assume that regset 0 is NT_PRSTATUS.
1736 	 */
1737 	fill_prstatus(&t->prstatus, t->task, signr);
1738 	regset0_size = regset_get(t->task, &view->regsets[0],
1739 		   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1740 	if (regset0_size < 0)
1741 		return 0;
1742 
1743 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1744 		  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1745 	*total += notesize(&t->notes[0]);
1746 
1747 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1748 
1749 	/*
1750 	 * Each other regset might generate a note too.  For each regset
1751 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1752 	 * all zero and we'll know to skip writing it later.
1753 	 */
1754 	for (i = 1; i < view->n; ++i) {
1755 		const struct user_regset *regset = &view->regsets[i];
1756 		int note_type = regset->core_note_type;
1757 		bool is_fpreg = note_type == NT_PRFPREG;
1758 		void *data;
1759 		int ret;
1760 
1761 		do_thread_regset_writeback(t->task, regset);
1762 		if (!note_type) // not for coredumps
1763 			continue;
1764 		if (regset->active && regset->active(t->task, regset) <= 0)
1765 			continue;
1766 
1767 		ret = regset_get_alloc(t->task, regset, ~0U, &data);
1768 		if (ret < 0)
1769 			continue;
1770 
1771 		if (is_fpreg)
1772 			SET_PR_FPVALID(&t->prstatus, 1, regset0_size);
1773 
1774 		fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
1775 			  note_type, ret, data);
1776 
1777 		*total += notesize(&t->notes[i]);
1778 	}
1779 
1780 	return 1;
1781 }
1782 
1783 static int fill_note_info(struct elfhdr *elf, int phdrs,
1784 			  struct elf_note_info *info,
1785 			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1786 {
1787 	struct task_struct *dump_task = current;
1788 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1789 	struct elf_thread_core_info *t;
1790 	struct elf_prpsinfo *psinfo;
1791 	struct core_thread *ct;
1792 	unsigned int i;
1793 
1794 	info->size = 0;
1795 	info->thread = NULL;
1796 
1797 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1798 	if (psinfo == NULL) {
1799 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1800 		return 0;
1801 	}
1802 
1803 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1804 
1805 	/*
1806 	 * Figure out how many notes we're going to need for each thread.
1807 	 */
1808 	info->thread_notes = 0;
1809 	for (i = 0; i < view->n; ++i)
1810 		if (view->regsets[i].core_note_type != 0)
1811 			++info->thread_notes;
1812 
1813 	/*
1814 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1815 	 * since it is our one special case.
1816 	 */
1817 	if (unlikely(info->thread_notes == 0) ||
1818 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1819 		WARN_ON(1);
1820 		return 0;
1821 	}
1822 
1823 	/*
1824 	 * Initialize the ELF file header.
1825 	 */
1826 	fill_elf_header(elf, phdrs,
1827 			view->e_machine, view->e_flags);
1828 
1829 	/*
1830 	 * Allocate a structure for each thread.
1831 	 */
1832 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1833 		t = kzalloc(offsetof(struct elf_thread_core_info,
1834 				     notes[info->thread_notes]),
1835 			    GFP_KERNEL);
1836 		if (unlikely(!t))
1837 			return 0;
1838 
1839 		t->task = ct->task;
1840 		if (ct->task == dump_task || !info->thread) {
1841 			t->next = info->thread;
1842 			info->thread = t;
1843 		} else {
1844 			/*
1845 			 * Make sure to keep the original task at
1846 			 * the head of the list.
1847 			 */
1848 			t->next = info->thread->next;
1849 			info->thread->next = t;
1850 		}
1851 	}
1852 
1853 	/*
1854 	 * Now fill in each thread's information.
1855 	 */
1856 	for (t = info->thread; t != NULL; t = t->next)
1857 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1858 			return 0;
1859 
1860 	/*
1861 	 * Fill in the two process-wide notes.
1862 	 */
1863 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1864 	info->size += notesize(&info->psinfo);
1865 
1866 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1867 	info->size += notesize(&info->signote);
1868 
1869 	fill_auxv_note(&info->auxv, current->mm);
1870 	info->size += notesize(&info->auxv);
1871 
1872 	if (fill_files_note(&info->files) == 0)
1873 		info->size += notesize(&info->files);
1874 
1875 	return 1;
1876 }
1877 
1878 static size_t get_note_info_size(struct elf_note_info *info)
1879 {
1880 	return info->size;
1881 }
1882 
1883 /*
1884  * Write all the notes for each thread.  When writing the first thread, the
1885  * process-wide notes are interleaved after the first thread-specific note.
1886  */
1887 static int write_note_info(struct elf_note_info *info,
1888 			   struct coredump_params *cprm)
1889 {
1890 	bool first = true;
1891 	struct elf_thread_core_info *t = info->thread;
1892 
1893 	do {
1894 		int i;
1895 
1896 		if (!writenote(&t->notes[0], cprm))
1897 			return 0;
1898 
1899 		if (first && !writenote(&info->psinfo, cprm))
1900 			return 0;
1901 		if (first && !writenote(&info->signote, cprm))
1902 			return 0;
1903 		if (first && !writenote(&info->auxv, cprm))
1904 			return 0;
1905 		if (first && info->files.data &&
1906 				!writenote(&info->files, cprm))
1907 			return 0;
1908 
1909 		for (i = 1; i < info->thread_notes; ++i)
1910 			if (t->notes[i].data &&
1911 			    !writenote(&t->notes[i], cprm))
1912 				return 0;
1913 
1914 		first = false;
1915 		t = t->next;
1916 	} while (t);
1917 
1918 	return 1;
1919 }
1920 
1921 static void free_note_info(struct elf_note_info *info)
1922 {
1923 	struct elf_thread_core_info *threads = info->thread;
1924 	while (threads) {
1925 		unsigned int i;
1926 		struct elf_thread_core_info *t = threads;
1927 		threads = t->next;
1928 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1929 		for (i = 1; i < info->thread_notes; ++i)
1930 			kfree(t->notes[i].data);
1931 		kfree(t);
1932 	}
1933 	kfree(info->psinfo.data);
1934 	kvfree(info->files.data);
1935 }
1936 
1937 #else
1938 
1939 /* Here is the structure in which status of each thread is captured. */
1940 struct elf_thread_status
1941 {
1942 	struct list_head list;
1943 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1944 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1945 	struct task_struct *thread;
1946 	struct memelfnote notes[3];
1947 	int num_notes;
1948 };
1949 
1950 /*
1951  * In order to add the specific thread information for the elf file format,
1952  * we need to keep a linked list of every threads pr_status and then create
1953  * a single section for them in the final core file.
1954  */
1955 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1956 {
1957 	int sz = 0;
1958 	struct task_struct *p = t->thread;
1959 	t->num_notes = 0;
1960 
1961 	fill_prstatus(&t->prstatus, p, signr);
1962 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1963 
1964 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1965 		  &(t->prstatus));
1966 	t->num_notes++;
1967 	sz += notesize(&t->notes[0]);
1968 
1969 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1970 								&t->fpu))) {
1971 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1972 			  &(t->fpu));
1973 		t->num_notes++;
1974 		sz += notesize(&t->notes[1]);
1975 	}
1976 	return sz;
1977 }
1978 
1979 struct elf_note_info {
1980 	struct memelfnote *notes;
1981 	struct memelfnote *notes_files;
1982 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1983 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1984 	struct list_head thread_list;
1985 	elf_fpregset_t *fpu;
1986 	user_siginfo_t csigdata;
1987 	int thread_status_size;
1988 	int numnote;
1989 };
1990 
1991 static int elf_note_info_init(struct elf_note_info *info)
1992 {
1993 	memset(info, 0, sizeof(*info));
1994 	INIT_LIST_HEAD(&info->thread_list);
1995 
1996 	/* Allocate space for ELF notes */
1997 	info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
1998 	if (!info->notes)
1999 		return 0;
2000 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2001 	if (!info->psinfo)
2002 		return 0;
2003 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2004 	if (!info->prstatus)
2005 		return 0;
2006 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2007 	if (!info->fpu)
2008 		return 0;
2009 	return 1;
2010 }
2011 
2012 static int fill_note_info(struct elfhdr *elf, int phdrs,
2013 			  struct elf_note_info *info,
2014 			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2015 {
2016 	struct core_thread *ct;
2017 	struct elf_thread_status *ets;
2018 
2019 	if (!elf_note_info_init(info))
2020 		return 0;
2021 
2022 	for (ct = current->mm->core_state->dumper.next;
2023 					ct; ct = ct->next) {
2024 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2025 		if (!ets)
2026 			return 0;
2027 
2028 		ets->thread = ct->task;
2029 		list_add(&ets->list, &info->thread_list);
2030 	}
2031 
2032 	list_for_each_entry(ets, &info->thread_list, list) {
2033 		int sz;
2034 
2035 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
2036 		info->thread_status_size += sz;
2037 	}
2038 	/* now collect the dump for the current */
2039 	memset(info->prstatus, 0, sizeof(*info->prstatus));
2040 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
2041 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2042 
2043 	/* Set up header */
2044 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2045 
2046 	/*
2047 	 * Set up the notes in similar form to SVR4 core dumps made
2048 	 * with info from their /proc.
2049 	 */
2050 
2051 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2052 		  sizeof(*info->prstatus), info->prstatus);
2053 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2054 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2055 		  sizeof(*info->psinfo), info->psinfo);
2056 
2057 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2058 	fill_auxv_note(info->notes + 3, current->mm);
2059 	info->numnote = 4;
2060 
2061 	if (fill_files_note(info->notes + info->numnote) == 0) {
2062 		info->notes_files = info->notes + info->numnote;
2063 		info->numnote++;
2064 	}
2065 
2066 	/* Try to dump the FPU. */
2067 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2068 							       info->fpu);
2069 	if (info->prstatus->pr_fpvalid)
2070 		fill_note(info->notes + info->numnote++,
2071 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2072 	return 1;
2073 }
2074 
2075 static size_t get_note_info_size(struct elf_note_info *info)
2076 {
2077 	int sz = 0;
2078 	int i;
2079 
2080 	for (i = 0; i < info->numnote; i++)
2081 		sz += notesize(info->notes + i);
2082 
2083 	sz += info->thread_status_size;
2084 
2085 	return sz;
2086 }
2087 
2088 static int write_note_info(struct elf_note_info *info,
2089 			   struct coredump_params *cprm)
2090 {
2091 	struct elf_thread_status *ets;
2092 	int i;
2093 
2094 	for (i = 0; i < info->numnote; i++)
2095 		if (!writenote(info->notes + i, cprm))
2096 			return 0;
2097 
2098 	/* write out the thread status notes section */
2099 	list_for_each_entry(ets, &info->thread_list, list) {
2100 		for (i = 0; i < ets->num_notes; i++)
2101 			if (!writenote(&ets->notes[i], cprm))
2102 				return 0;
2103 	}
2104 
2105 	return 1;
2106 }
2107 
2108 static void free_note_info(struct elf_note_info *info)
2109 {
2110 	while (!list_empty(&info->thread_list)) {
2111 		struct list_head *tmp = info->thread_list.next;
2112 		list_del(tmp);
2113 		kfree(list_entry(tmp, struct elf_thread_status, list));
2114 	}
2115 
2116 	/* Free data possibly allocated by fill_files_note(): */
2117 	if (info->notes_files)
2118 		kvfree(info->notes_files->data);
2119 
2120 	kfree(info->prstatus);
2121 	kfree(info->psinfo);
2122 	kfree(info->notes);
2123 	kfree(info->fpu);
2124 }
2125 
2126 #endif
2127 
2128 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2129 			     elf_addr_t e_shoff, int segs)
2130 {
2131 	elf->e_shoff = e_shoff;
2132 	elf->e_shentsize = sizeof(*shdr4extnum);
2133 	elf->e_shnum = 1;
2134 	elf->e_shstrndx = SHN_UNDEF;
2135 
2136 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2137 
2138 	shdr4extnum->sh_type = SHT_NULL;
2139 	shdr4extnum->sh_size = elf->e_shnum;
2140 	shdr4extnum->sh_link = elf->e_shstrndx;
2141 	shdr4extnum->sh_info = segs;
2142 }
2143 
2144 /*
2145  * Actual dumper
2146  *
2147  * This is a two-pass process; first we find the offsets of the bits,
2148  * and then they are actually written out.  If we run out of core limit
2149  * we just truncate.
2150  */
2151 static int elf_core_dump(struct coredump_params *cprm)
2152 {
2153 	int has_dumped = 0;
2154 	int vma_count, segs, i;
2155 	size_t vma_data_size;
2156 	struct elfhdr elf;
2157 	loff_t offset = 0, dataoff;
2158 	struct elf_note_info info = { };
2159 	struct elf_phdr *phdr4note = NULL;
2160 	struct elf_shdr *shdr4extnum = NULL;
2161 	Elf_Half e_phnum;
2162 	elf_addr_t e_shoff;
2163 	struct core_vma_metadata *vma_meta;
2164 
2165 	if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
2166 		return 0;
2167 
2168 	/*
2169 	 * The number of segs are recored into ELF header as 16bit value.
2170 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2171 	 */
2172 	segs = vma_count + elf_core_extra_phdrs();
2173 
2174 	/* for notes section */
2175 	segs++;
2176 
2177 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2178 	 * this, kernel supports extended numbering. Have a look at
2179 	 * include/linux/elf.h for further information. */
2180 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2181 
2182 	/*
2183 	 * Collect all the non-memory information about the process for the
2184 	 * notes.  This also sets up the file header.
2185 	 */
2186 	if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2187 		goto end_coredump;
2188 
2189 	has_dumped = 1;
2190 
2191 	offset += sizeof(elf);				/* Elf header */
2192 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2193 
2194 	/* Write notes phdr entry */
2195 	{
2196 		size_t sz = get_note_info_size(&info);
2197 
2198 		sz += elf_coredump_extra_notes_size();
2199 
2200 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2201 		if (!phdr4note)
2202 			goto end_coredump;
2203 
2204 		fill_elf_note_phdr(phdr4note, sz, offset);
2205 		offset += sz;
2206 	}
2207 
2208 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2209 
2210 	offset += vma_data_size;
2211 	offset += elf_core_extra_data_size();
2212 	e_shoff = offset;
2213 
2214 	if (e_phnum == PN_XNUM) {
2215 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2216 		if (!shdr4extnum)
2217 			goto end_coredump;
2218 		fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2219 	}
2220 
2221 	offset = dataoff;
2222 
2223 	if (!dump_emit(cprm, &elf, sizeof(elf)))
2224 		goto end_coredump;
2225 
2226 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2227 		goto end_coredump;
2228 
2229 	/* Write program headers for segments dump */
2230 	for (i = 0; i < vma_count; i++) {
2231 		struct core_vma_metadata *meta = vma_meta + i;
2232 		struct elf_phdr phdr;
2233 
2234 		phdr.p_type = PT_LOAD;
2235 		phdr.p_offset = offset;
2236 		phdr.p_vaddr = meta->start;
2237 		phdr.p_paddr = 0;
2238 		phdr.p_filesz = meta->dump_size;
2239 		phdr.p_memsz = meta->end - meta->start;
2240 		offset += phdr.p_filesz;
2241 		phdr.p_flags = 0;
2242 		if (meta->flags & VM_READ)
2243 			phdr.p_flags |= PF_R;
2244 		if (meta->flags & VM_WRITE)
2245 			phdr.p_flags |= PF_W;
2246 		if (meta->flags & VM_EXEC)
2247 			phdr.p_flags |= PF_X;
2248 		phdr.p_align = ELF_EXEC_PAGESIZE;
2249 
2250 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2251 			goto end_coredump;
2252 	}
2253 
2254 	if (!elf_core_write_extra_phdrs(cprm, offset))
2255 		goto end_coredump;
2256 
2257  	/* write out the notes section */
2258 	if (!write_note_info(&info, cprm))
2259 		goto end_coredump;
2260 
2261 	if (elf_coredump_extra_notes_write(cprm))
2262 		goto end_coredump;
2263 
2264 	/* Align to page */
2265 	if (!dump_skip(cprm, dataoff - cprm->pos))
2266 		goto end_coredump;
2267 
2268 	for (i = 0; i < vma_count; i++) {
2269 		struct core_vma_metadata *meta = vma_meta + i;
2270 
2271 		if (!dump_user_range(cprm, meta->start, meta->dump_size))
2272 			goto end_coredump;
2273 	}
2274 	dump_truncate(cprm);
2275 
2276 	if (!elf_core_write_extra_data(cprm))
2277 		goto end_coredump;
2278 
2279 	if (e_phnum == PN_XNUM) {
2280 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2281 			goto end_coredump;
2282 	}
2283 
2284 end_coredump:
2285 	free_note_info(&info);
2286 	kfree(shdr4extnum);
2287 	kvfree(vma_meta);
2288 	kfree(phdr4note);
2289 	return has_dumped;
2290 }
2291 
2292 #endif		/* CONFIG_ELF_CORE */
2293 
2294 static int __init init_elf_binfmt(void)
2295 {
2296 	register_binfmt(&elf_format);
2297 	return 0;
2298 }
2299 
2300 static void __exit exit_elf_binfmt(void)
2301 {
2302 	/* Remove the COFF and ELF loaders. */
2303 	unregister_binfmt(&elf_format);
2304 }
2305 
2306 core_initcall(init_elf_binfmt);
2307 module_exit(exit_elf_binfmt);
2308 MODULE_LICENSE("GPL");
2309