xref: /linux/fs/binfmt_elf.c (revision a3a4a816b4b194c45d0217e8b9e08b2639802cda)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/sched/task_stack.h>
40 #include <linux/sched/cputime.h>
41 #include <linux/cred.h>
42 #include <linux/dax.h>
43 #include <linux/uaccess.h>
44 #include <asm/param.h>
45 #include <asm/page.h>
46 
47 #ifndef user_long_t
48 #define user_long_t long
49 #endif
50 #ifndef user_siginfo_t
51 #define user_siginfo_t siginfo_t
52 #endif
53 
54 static int load_elf_binary(struct linux_binprm *bprm);
55 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
56 				int, int, unsigned long);
57 
58 #ifdef CONFIG_USELIB
59 static int load_elf_library(struct file *);
60 #else
61 #define load_elf_library NULL
62 #endif
63 
64 /*
65  * If we don't support core dumping, then supply a NULL so we
66  * don't even try.
67  */
68 #ifdef CONFIG_ELF_CORE
69 static int elf_core_dump(struct coredump_params *cprm);
70 #else
71 #define elf_core_dump	NULL
72 #endif
73 
74 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
75 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
76 #else
77 #define ELF_MIN_ALIGN	PAGE_SIZE
78 #endif
79 
80 #ifndef ELF_CORE_EFLAGS
81 #define ELF_CORE_EFLAGS	0
82 #endif
83 
84 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
85 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
86 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
87 
88 static struct linux_binfmt elf_format = {
89 	.module		= THIS_MODULE,
90 	.load_binary	= load_elf_binary,
91 	.load_shlib	= load_elf_library,
92 	.core_dump	= elf_core_dump,
93 	.min_coredump	= ELF_EXEC_PAGESIZE,
94 };
95 
96 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
97 
98 static int set_brk(unsigned long start, unsigned long end, int prot)
99 {
100 	start = ELF_PAGEALIGN(start);
101 	end = ELF_PAGEALIGN(end);
102 	if (end > start) {
103 		/*
104 		 * Map the last of the bss segment.
105 		 * If the header is requesting these pages to be
106 		 * executable, honour that (ppc32 needs this).
107 		 */
108 		int error = vm_brk_flags(start, end - start,
109 				prot & PROT_EXEC ? VM_EXEC : 0);
110 		if (error)
111 			return error;
112 	}
113 	current->mm->start_brk = current->mm->brk = end;
114 	return 0;
115 }
116 
117 /* We need to explicitly zero any fractional pages
118    after the data section (i.e. bss).  This would
119    contain the junk from the file that should not
120    be in memory
121  */
122 static int padzero(unsigned long elf_bss)
123 {
124 	unsigned long nbyte;
125 
126 	nbyte = ELF_PAGEOFFSET(elf_bss);
127 	if (nbyte) {
128 		nbyte = ELF_MIN_ALIGN - nbyte;
129 		if (clear_user((void __user *) elf_bss, nbyte))
130 			return -EFAULT;
131 	}
132 	return 0;
133 }
134 
135 /* Let's use some macros to make this stack manipulation a little clearer */
136 #ifdef CONFIG_STACK_GROWSUP
137 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
138 #define STACK_ROUND(sp, items) \
139 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
140 #define STACK_ALLOC(sp, len) ({ \
141 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
142 	old_sp; })
143 #else
144 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
145 #define STACK_ROUND(sp, items) \
146 	(((unsigned long) (sp - items)) &~ 15UL)
147 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
148 #endif
149 
150 #ifndef ELF_BASE_PLATFORM
151 /*
152  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
153  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
154  * will be copied to the user stack in the same manner as AT_PLATFORM.
155  */
156 #define ELF_BASE_PLATFORM NULL
157 #endif
158 
159 static int
160 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
161 		unsigned long load_addr, unsigned long interp_load_addr)
162 {
163 	unsigned long p = bprm->p;
164 	int argc = bprm->argc;
165 	int envc = bprm->envc;
166 	elf_addr_t __user *argv;
167 	elf_addr_t __user *envp;
168 	elf_addr_t __user *sp;
169 	elf_addr_t __user *u_platform;
170 	elf_addr_t __user *u_base_platform;
171 	elf_addr_t __user *u_rand_bytes;
172 	const char *k_platform = ELF_PLATFORM;
173 	const char *k_base_platform = ELF_BASE_PLATFORM;
174 	unsigned char k_rand_bytes[16];
175 	int items;
176 	elf_addr_t *elf_info;
177 	int ei_index = 0;
178 	const struct cred *cred = current_cred();
179 	struct vm_area_struct *vma;
180 
181 	/*
182 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
183 	 * evictions by the processes running on the same package. One
184 	 * thing we can do is to shuffle the initial stack for them.
185 	 */
186 
187 	p = arch_align_stack(p);
188 
189 	/*
190 	 * If this architecture has a platform capability string, copy it
191 	 * to userspace.  In some cases (Sparc), this info is impossible
192 	 * for userspace to get any other way, in others (i386) it is
193 	 * merely difficult.
194 	 */
195 	u_platform = NULL;
196 	if (k_platform) {
197 		size_t len = strlen(k_platform) + 1;
198 
199 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
200 		if (__copy_to_user(u_platform, k_platform, len))
201 			return -EFAULT;
202 	}
203 
204 	/*
205 	 * If this architecture has a "base" platform capability
206 	 * string, copy it to userspace.
207 	 */
208 	u_base_platform = NULL;
209 	if (k_base_platform) {
210 		size_t len = strlen(k_base_platform) + 1;
211 
212 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
213 		if (__copy_to_user(u_base_platform, k_base_platform, len))
214 			return -EFAULT;
215 	}
216 
217 	/*
218 	 * Generate 16 random bytes for userspace PRNG seeding.
219 	 */
220 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
221 	u_rand_bytes = (elf_addr_t __user *)
222 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
223 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
224 		return -EFAULT;
225 
226 	/* Create the ELF interpreter info */
227 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
228 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
229 #define NEW_AUX_ENT(id, val) \
230 	do { \
231 		elf_info[ei_index++] = id; \
232 		elf_info[ei_index++] = val; \
233 	} while (0)
234 
235 #ifdef ARCH_DLINFO
236 	/*
237 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
238 	 * AUXV.
239 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
240 	 * ARCH_DLINFO changes
241 	 */
242 	ARCH_DLINFO;
243 #endif
244 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
245 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
246 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
247 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
248 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
249 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
250 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
251 	NEW_AUX_ENT(AT_FLAGS, 0);
252 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
253 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
254 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
255 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
256 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
257  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
258 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
259 #ifdef ELF_HWCAP2
260 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
261 #endif
262 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
263 	if (k_platform) {
264 		NEW_AUX_ENT(AT_PLATFORM,
265 			    (elf_addr_t)(unsigned long)u_platform);
266 	}
267 	if (k_base_platform) {
268 		NEW_AUX_ENT(AT_BASE_PLATFORM,
269 			    (elf_addr_t)(unsigned long)u_base_platform);
270 	}
271 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
272 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
273 	}
274 #undef NEW_AUX_ENT
275 	/* AT_NULL is zero; clear the rest too */
276 	memset(&elf_info[ei_index], 0,
277 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
278 
279 	/* And advance past the AT_NULL entry.  */
280 	ei_index += 2;
281 
282 	sp = STACK_ADD(p, ei_index);
283 
284 	items = (argc + 1) + (envc + 1) + 1;
285 	bprm->p = STACK_ROUND(sp, items);
286 
287 	/* Point sp at the lowest address on the stack */
288 #ifdef CONFIG_STACK_GROWSUP
289 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
290 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
291 #else
292 	sp = (elf_addr_t __user *)bprm->p;
293 #endif
294 
295 
296 	/*
297 	 * Grow the stack manually; some architectures have a limit on how
298 	 * far ahead a user-space access may be in order to grow the stack.
299 	 */
300 	vma = find_extend_vma(current->mm, bprm->p);
301 	if (!vma)
302 		return -EFAULT;
303 
304 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
305 	if (__put_user(argc, sp++))
306 		return -EFAULT;
307 	argv = sp;
308 	envp = argv + argc + 1;
309 
310 	/* Populate argv and envp */
311 	p = current->mm->arg_end = current->mm->arg_start;
312 	while (argc-- > 0) {
313 		size_t len;
314 		if (__put_user((elf_addr_t)p, argv++))
315 			return -EFAULT;
316 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
317 		if (!len || len > MAX_ARG_STRLEN)
318 			return -EINVAL;
319 		p += len;
320 	}
321 	if (__put_user(0, argv))
322 		return -EFAULT;
323 	current->mm->arg_end = current->mm->env_start = p;
324 	while (envc-- > 0) {
325 		size_t len;
326 		if (__put_user((elf_addr_t)p, envp++))
327 			return -EFAULT;
328 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
329 		if (!len || len > MAX_ARG_STRLEN)
330 			return -EINVAL;
331 		p += len;
332 	}
333 	if (__put_user(0, envp))
334 		return -EFAULT;
335 	current->mm->env_end = p;
336 
337 	/* Put the elf_info on the stack in the right place.  */
338 	sp = (elf_addr_t __user *)envp + 1;
339 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
340 		return -EFAULT;
341 	return 0;
342 }
343 
344 #ifndef elf_map
345 
346 static unsigned long elf_map(struct file *filep, unsigned long addr,
347 		struct elf_phdr *eppnt, int prot, int type,
348 		unsigned long total_size)
349 {
350 	unsigned long map_addr;
351 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
352 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
353 	addr = ELF_PAGESTART(addr);
354 	size = ELF_PAGEALIGN(size);
355 
356 	/* mmap() will return -EINVAL if given a zero size, but a
357 	 * segment with zero filesize is perfectly valid */
358 	if (!size)
359 		return addr;
360 
361 	/*
362 	* total_size is the size of the ELF (interpreter) image.
363 	* The _first_ mmap needs to know the full size, otherwise
364 	* randomization might put this image into an overlapping
365 	* position with the ELF binary image. (since size < total_size)
366 	* So we first map the 'big' image - and unmap the remainder at
367 	* the end. (which unmap is needed for ELF images with holes.)
368 	*/
369 	if (total_size) {
370 		total_size = ELF_PAGEALIGN(total_size);
371 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
372 		if (!BAD_ADDR(map_addr))
373 			vm_munmap(map_addr+size, total_size-size);
374 	} else
375 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
376 
377 	return(map_addr);
378 }
379 
380 #endif /* !elf_map */
381 
382 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
383 {
384 	int i, first_idx = -1, last_idx = -1;
385 
386 	for (i = 0; i < nr; i++) {
387 		if (cmds[i].p_type == PT_LOAD) {
388 			last_idx = i;
389 			if (first_idx == -1)
390 				first_idx = i;
391 		}
392 	}
393 	if (first_idx == -1)
394 		return 0;
395 
396 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
397 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
398 }
399 
400 /**
401  * load_elf_phdrs() - load ELF program headers
402  * @elf_ex:   ELF header of the binary whose program headers should be loaded
403  * @elf_file: the opened ELF binary file
404  *
405  * Loads ELF program headers from the binary file elf_file, which has the ELF
406  * header pointed to by elf_ex, into a newly allocated array. The caller is
407  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
408  */
409 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
410 				       struct file *elf_file)
411 {
412 	struct elf_phdr *elf_phdata = NULL;
413 	int retval, size, err = -1;
414 
415 	/*
416 	 * If the size of this structure has changed, then punt, since
417 	 * we will be doing the wrong thing.
418 	 */
419 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
420 		goto out;
421 
422 	/* Sanity check the number of program headers... */
423 	if (elf_ex->e_phnum < 1 ||
424 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
425 		goto out;
426 
427 	/* ...and their total size. */
428 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
429 	if (size > ELF_MIN_ALIGN)
430 		goto out;
431 
432 	elf_phdata = kmalloc(size, GFP_KERNEL);
433 	if (!elf_phdata)
434 		goto out;
435 
436 	/* Read in the program headers */
437 	retval = kernel_read(elf_file, elf_ex->e_phoff,
438 			     (char *)elf_phdata, size);
439 	if (retval != size) {
440 		err = (retval < 0) ? retval : -EIO;
441 		goto out;
442 	}
443 
444 	/* Success! */
445 	err = 0;
446 out:
447 	if (err) {
448 		kfree(elf_phdata);
449 		elf_phdata = NULL;
450 	}
451 	return elf_phdata;
452 }
453 
454 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
455 
456 /**
457  * struct arch_elf_state - arch-specific ELF loading state
458  *
459  * This structure is used to preserve architecture specific data during
460  * the loading of an ELF file, throughout the checking of architecture
461  * specific ELF headers & through to the point where the ELF load is
462  * known to be proceeding (ie. SET_PERSONALITY).
463  *
464  * This implementation is a dummy for architectures which require no
465  * specific state.
466  */
467 struct arch_elf_state {
468 };
469 
470 #define INIT_ARCH_ELF_STATE {}
471 
472 /**
473  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
474  * @ehdr:	The main ELF header
475  * @phdr:	The program header to check
476  * @elf:	The open ELF file
477  * @is_interp:	True if the phdr is from the interpreter of the ELF being
478  *		loaded, else false.
479  * @state:	Architecture-specific state preserved throughout the process
480  *		of loading the ELF.
481  *
482  * Inspects the program header phdr to validate its correctness and/or
483  * suitability for the system. Called once per ELF program header in the
484  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
485  * interpreter.
486  *
487  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
488  *         with that return code.
489  */
490 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
491 				   struct elf_phdr *phdr,
492 				   struct file *elf, bool is_interp,
493 				   struct arch_elf_state *state)
494 {
495 	/* Dummy implementation, always proceed */
496 	return 0;
497 }
498 
499 /**
500  * arch_check_elf() - check an ELF executable
501  * @ehdr:	The main ELF header
502  * @has_interp:	True if the ELF has an interpreter, else false.
503  * @interp_ehdr: The interpreter's ELF header
504  * @state:	Architecture-specific state preserved throughout the process
505  *		of loading the ELF.
506  *
507  * Provides a final opportunity for architecture code to reject the loading
508  * of the ELF & cause an exec syscall to return an error. This is called after
509  * all program headers to be checked by arch_elf_pt_proc have been.
510  *
511  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
512  *         with that return code.
513  */
514 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
515 				 struct elfhdr *interp_ehdr,
516 				 struct arch_elf_state *state)
517 {
518 	/* Dummy implementation, always proceed */
519 	return 0;
520 }
521 
522 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
523 
524 /* This is much more generalized than the library routine read function,
525    so we keep this separate.  Technically the library read function
526    is only provided so that we can read a.out libraries that have
527    an ELF header */
528 
529 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
530 		struct file *interpreter, unsigned long *interp_map_addr,
531 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
532 {
533 	struct elf_phdr *eppnt;
534 	unsigned long load_addr = 0;
535 	int load_addr_set = 0;
536 	unsigned long last_bss = 0, elf_bss = 0;
537 	int bss_prot = 0;
538 	unsigned long error = ~0UL;
539 	unsigned long total_size;
540 	int i;
541 
542 	/* First of all, some simple consistency checks */
543 	if (interp_elf_ex->e_type != ET_EXEC &&
544 	    interp_elf_ex->e_type != ET_DYN)
545 		goto out;
546 	if (!elf_check_arch(interp_elf_ex))
547 		goto out;
548 	if (!interpreter->f_op->mmap)
549 		goto out;
550 
551 	total_size = total_mapping_size(interp_elf_phdata,
552 					interp_elf_ex->e_phnum);
553 	if (!total_size) {
554 		error = -EINVAL;
555 		goto out;
556 	}
557 
558 	eppnt = interp_elf_phdata;
559 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
560 		if (eppnt->p_type == PT_LOAD) {
561 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
562 			int elf_prot = 0;
563 			unsigned long vaddr = 0;
564 			unsigned long k, map_addr;
565 
566 			if (eppnt->p_flags & PF_R)
567 		    		elf_prot = PROT_READ;
568 			if (eppnt->p_flags & PF_W)
569 				elf_prot |= PROT_WRITE;
570 			if (eppnt->p_flags & PF_X)
571 				elf_prot |= PROT_EXEC;
572 			vaddr = eppnt->p_vaddr;
573 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
574 				elf_type |= MAP_FIXED;
575 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
576 				load_addr = -vaddr;
577 
578 			map_addr = elf_map(interpreter, load_addr + vaddr,
579 					eppnt, elf_prot, elf_type, total_size);
580 			total_size = 0;
581 			if (!*interp_map_addr)
582 				*interp_map_addr = map_addr;
583 			error = map_addr;
584 			if (BAD_ADDR(map_addr))
585 				goto out;
586 
587 			if (!load_addr_set &&
588 			    interp_elf_ex->e_type == ET_DYN) {
589 				load_addr = map_addr - ELF_PAGESTART(vaddr);
590 				load_addr_set = 1;
591 			}
592 
593 			/*
594 			 * Check to see if the section's size will overflow the
595 			 * allowed task size. Note that p_filesz must always be
596 			 * <= p_memsize so it's only necessary to check p_memsz.
597 			 */
598 			k = load_addr + eppnt->p_vaddr;
599 			if (BAD_ADDR(k) ||
600 			    eppnt->p_filesz > eppnt->p_memsz ||
601 			    eppnt->p_memsz > TASK_SIZE ||
602 			    TASK_SIZE - eppnt->p_memsz < k) {
603 				error = -ENOMEM;
604 				goto out;
605 			}
606 
607 			/*
608 			 * Find the end of the file mapping for this phdr, and
609 			 * keep track of the largest address we see for this.
610 			 */
611 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
612 			if (k > elf_bss)
613 				elf_bss = k;
614 
615 			/*
616 			 * Do the same thing for the memory mapping - between
617 			 * elf_bss and last_bss is the bss section.
618 			 */
619 			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
620 			if (k > last_bss) {
621 				last_bss = k;
622 				bss_prot = elf_prot;
623 			}
624 		}
625 	}
626 
627 	/*
628 	 * Now fill out the bss section: first pad the last page from
629 	 * the file up to the page boundary, and zero it from elf_bss
630 	 * up to the end of the page.
631 	 */
632 	if (padzero(elf_bss)) {
633 		error = -EFAULT;
634 		goto out;
635 	}
636 	/*
637 	 * Next, align both the file and mem bss up to the page size,
638 	 * since this is where elf_bss was just zeroed up to, and where
639 	 * last_bss will end after the vm_brk_flags() below.
640 	 */
641 	elf_bss = ELF_PAGEALIGN(elf_bss);
642 	last_bss = ELF_PAGEALIGN(last_bss);
643 	/* Finally, if there is still more bss to allocate, do it. */
644 	if (last_bss > elf_bss) {
645 		error = vm_brk_flags(elf_bss, last_bss - elf_bss,
646 				bss_prot & PROT_EXEC ? VM_EXEC : 0);
647 		if (error)
648 			goto out;
649 	}
650 
651 	error = load_addr;
652 out:
653 	return error;
654 }
655 
656 /*
657  * These are the functions used to load ELF style executables and shared
658  * libraries.  There is no binary dependent code anywhere else.
659  */
660 
661 #ifndef STACK_RND_MASK
662 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
663 #endif
664 
665 static unsigned long randomize_stack_top(unsigned long stack_top)
666 {
667 	unsigned long random_variable = 0;
668 
669 	if ((current->flags & PF_RANDOMIZE) &&
670 		!(current->personality & ADDR_NO_RANDOMIZE)) {
671 		random_variable = get_random_long();
672 		random_variable &= STACK_RND_MASK;
673 		random_variable <<= PAGE_SHIFT;
674 	}
675 #ifdef CONFIG_STACK_GROWSUP
676 	return PAGE_ALIGN(stack_top) + random_variable;
677 #else
678 	return PAGE_ALIGN(stack_top) - random_variable;
679 #endif
680 }
681 
682 static int load_elf_binary(struct linux_binprm *bprm)
683 {
684 	struct file *interpreter = NULL; /* to shut gcc up */
685  	unsigned long load_addr = 0, load_bias = 0;
686 	int load_addr_set = 0;
687 	char * elf_interpreter = NULL;
688 	unsigned long error;
689 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
690 	unsigned long elf_bss, elf_brk;
691 	int bss_prot = 0;
692 	int retval, i;
693 	unsigned long elf_entry;
694 	unsigned long interp_load_addr = 0;
695 	unsigned long start_code, end_code, start_data, end_data;
696 	unsigned long reloc_func_desc __maybe_unused = 0;
697 	int executable_stack = EXSTACK_DEFAULT;
698 	struct pt_regs *regs = current_pt_regs();
699 	struct {
700 		struct elfhdr elf_ex;
701 		struct elfhdr interp_elf_ex;
702 	} *loc;
703 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
704 
705 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
706 	if (!loc) {
707 		retval = -ENOMEM;
708 		goto out_ret;
709 	}
710 
711 	/* Get the exec-header */
712 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
713 
714 	retval = -ENOEXEC;
715 	/* First of all, some simple consistency checks */
716 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
717 		goto out;
718 
719 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
720 		goto out;
721 	if (!elf_check_arch(&loc->elf_ex))
722 		goto out;
723 	if (!bprm->file->f_op->mmap)
724 		goto out;
725 
726 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
727 	if (!elf_phdata)
728 		goto out;
729 
730 	elf_ppnt = elf_phdata;
731 	elf_bss = 0;
732 	elf_brk = 0;
733 
734 	start_code = ~0UL;
735 	end_code = 0;
736 	start_data = 0;
737 	end_data = 0;
738 
739 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
740 		if (elf_ppnt->p_type == PT_INTERP) {
741 			/* This is the program interpreter used for
742 			 * shared libraries - for now assume that this
743 			 * is an a.out format binary
744 			 */
745 			retval = -ENOEXEC;
746 			if (elf_ppnt->p_filesz > PATH_MAX ||
747 			    elf_ppnt->p_filesz < 2)
748 				goto out_free_ph;
749 
750 			retval = -ENOMEM;
751 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
752 						  GFP_KERNEL);
753 			if (!elf_interpreter)
754 				goto out_free_ph;
755 
756 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
757 					     elf_interpreter,
758 					     elf_ppnt->p_filesz);
759 			if (retval != elf_ppnt->p_filesz) {
760 				if (retval >= 0)
761 					retval = -EIO;
762 				goto out_free_interp;
763 			}
764 			/* make sure path is NULL terminated */
765 			retval = -ENOEXEC;
766 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
767 				goto out_free_interp;
768 
769 			interpreter = open_exec(elf_interpreter);
770 			retval = PTR_ERR(interpreter);
771 			if (IS_ERR(interpreter))
772 				goto out_free_interp;
773 
774 			/*
775 			 * If the binary is not readable then enforce
776 			 * mm->dumpable = 0 regardless of the interpreter's
777 			 * permissions.
778 			 */
779 			would_dump(bprm, interpreter);
780 
781 			/* Get the exec headers */
782 			retval = kernel_read(interpreter, 0,
783 					     (void *)&loc->interp_elf_ex,
784 					     sizeof(loc->interp_elf_ex));
785 			if (retval != sizeof(loc->interp_elf_ex)) {
786 				if (retval >= 0)
787 					retval = -EIO;
788 				goto out_free_dentry;
789 			}
790 
791 			break;
792 		}
793 		elf_ppnt++;
794 	}
795 
796 	elf_ppnt = elf_phdata;
797 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
798 		switch (elf_ppnt->p_type) {
799 		case PT_GNU_STACK:
800 			if (elf_ppnt->p_flags & PF_X)
801 				executable_stack = EXSTACK_ENABLE_X;
802 			else
803 				executable_stack = EXSTACK_DISABLE_X;
804 			break;
805 
806 		case PT_LOPROC ... PT_HIPROC:
807 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
808 						  bprm->file, false,
809 						  &arch_state);
810 			if (retval)
811 				goto out_free_dentry;
812 			break;
813 		}
814 
815 	/* Some simple consistency checks for the interpreter */
816 	if (elf_interpreter) {
817 		retval = -ELIBBAD;
818 		/* Not an ELF interpreter */
819 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
820 			goto out_free_dentry;
821 		/* Verify the interpreter has a valid arch */
822 		if (!elf_check_arch(&loc->interp_elf_ex))
823 			goto out_free_dentry;
824 
825 		/* Load the interpreter program headers */
826 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
827 						   interpreter);
828 		if (!interp_elf_phdata)
829 			goto out_free_dentry;
830 
831 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
832 		elf_ppnt = interp_elf_phdata;
833 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
834 			switch (elf_ppnt->p_type) {
835 			case PT_LOPROC ... PT_HIPROC:
836 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
837 							  elf_ppnt, interpreter,
838 							  true, &arch_state);
839 				if (retval)
840 					goto out_free_dentry;
841 				break;
842 			}
843 	}
844 
845 	/*
846 	 * Allow arch code to reject the ELF at this point, whilst it's
847 	 * still possible to return an error to the code that invoked
848 	 * the exec syscall.
849 	 */
850 	retval = arch_check_elf(&loc->elf_ex,
851 				!!interpreter, &loc->interp_elf_ex,
852 				&arch_state);
853 	if (retval)
854 		goto out_free_dentry;
855 
856 	/* Flush all traces of the currently running executable */
857 	retval = flush_old_exec(bprm);
858 	if (retval)
859 		goto out_free_dentry;
860 
861 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
862 	   may depend on the personality.  */
863 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
864 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
865 		current->personality |= READ_IMPLIES_EXEC;
866 
867 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
868 		current->flags |= PF_RANDOMIZE;
869 
870 	setup_new_exec(bprm);
871 	install_exec_creds(bprm);
872 
873 	/* Do this so that we can load the interpreter, if need be.  We will
874 	   change some of these later */
875 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
876 				 executable_stack);
877 	if (retval < 0)
878 		goto out_free_dentry;
879 
880 	current->mm->start_stack = bprm->p;
881 
882 	/* Now we do a little grungy work by mmapping the ELF image into
883 	   the correct location in memory. */
884 	for(i = 0, elf_ppnt = elf_phdata;
885 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
886 		int elf_prot = 0, elf_flags;
887 		unsigned long k, vaddr;
888 		unsigned long total_size = 0;
889 
890 		if (elf_ppnt->p_type != PT_LOAD)
891 			continue;
892 
893 		if (unlikely (elf_brk > elf_bss)) {
894 			unsigned long nbyte;
895 
896 			/* There was a PT_LOAD segment with p_memsz > p_filesz
897 			   before this one. Map anonymous pages, if needed,
898 			   and clear the area.  */
899 			retval = set_brk(elf_bss + load_bias,
900 					 elf_brk + load_bias,
901 					 bss_prot);
902 			if (retval)
903 				goto out_free_dentry;
904 			nbyte = ELF_PAGEOFFSET(elf_bss);
905 			if (nbyte) {
906 				nbyte = ELF_MIN_ALIGN - nbyte;
907 				if (nbyte > elf_brk - elf_bss)
908 					nbyte = elf_brk - elf_bss;
909 				if (clear_user((void __user *)elf_bss +
910 							load_bias, nbyte)) {
911 					/*
912 					 * This bss-zeroing can fail if the ELF
913 					 * file specifies odd protections. So
914 					 * we don't check the return value
915 					 */
916 				}
917 			}
918 		}
919 
920 		if (elf_ppnt->p_flags & PF_R)
921 			elf_prot |= PROT_READ;
922 		if (elf_ppnt->p_flags & PF_W)
923 			elf_prot |= PROT_WRITE;
924 		if (elf_ppnt->p_flags & PF_X)
925 			elf_prot |= PROT_EXEC;
926 
927 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
928 
929 		vaddr = elf_ppnt->p_vaddr;
930 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
931 			elf_flags |= MAP_FIXED;
932 		} else if (loc->elf_ex.e_type == ET_DYN) {
933 			/* Try and get dynamic programs out of the way of the
934 			 * default mmap base, as well as whatever program they
935 			 * might try to exec.  This is because the brk will
936 			 * follow the loader, and is not movable.  */
937 			load_bias = ELF_ET_DYN_BASE - vaddr;
938 			if (current->flags & PF_RANDOMIZE)
939 				load_bias += arch_mmap_rnd();
940 			load_bias = ELF_PAGESTART(load_bias);
941 			total_size = total_mapping_size(elf_phdata,
942 							loc->elf_ex.e_phnum);
943 			if (!total_size) {
944 				retval = -EINVAL;
945 				goto out_free_dentry;
946 			}
947 		}
948 
949 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
950 				elf_prot, elf_flags, total_size);
951 		if (BAD_ADDR(error)) {
952 			retval = IS_ERR((void *)error) ?
953 				PTR_ERR((void*)error) : -EINVAL;
954 			goto out_free_dentry;
955 		}
956 
957 		if (!load_addr_set) {
958 			load_addr_set = 1;
959 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
960 			if (loc->elf_ex.e_type == ET_DYN) {
961 				load_bias += error -
962 				             ELF_PAGESTART(load_bias + vaddr);
963 				load_addr += load_bias;
964 				reloc_func_desc = load_bias;
965 			}
966 		}
967 		k = elf_ppnt->p_vaddr;
968 		if (k < start_code)
969 			start_code = k;
970 		if (start_data < k)
971 			start_data = k;
972 
973 		/*
974 		 * Check to see if the section's size will overflow the
975 		 * allowed task size. Note that p_filesz must always be
976 		 * <= p_memsz so it is only necessary to check p_memsz.
977 		 */
978 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
979 		    elf_ppnt->p_memsz > TASK_SIZE ||
980 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
981 			/* set_brk can never work. Avoid overflows. */
982 			retval = -EINVAL;
983 			goto out_free_dentry;
984 		}
985 
986 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
987 
988 		if (k > elf_bss)
989 			elf_bss = k;
990 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
991 			end_code = k;
992 		if (end_data < k)
993 			end_data = k;
994 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
995 		if (k > elf_brk) {
996 			bss_prot = elf_prot;
997 			elf_brk = k;
998 		}
999 	}
1000 
1001 	loc->elf_ex.e_entry += load_bias;
1002 	elf_bss += load_bias;
1003 	elf_brk += load_bias;
1004 	start_code += load_bias;
1005 	end_code += load_bias;
1006 	start_data += load_bias;
1007 	end_data += load_bias;
1008 
1009 	/* Calling set_brk effectively mmaps the pages that we need
1010 	 * for the bss and break sections.  We must do this before
1011 	 * mapping in the interpreter, to make sure it doesn't wind
1012 	 * up getting placed where the bss needs to go.
1013 	 */
1014 	retval = set_brk(elf_bss, elf_brk, bss_prot);
1015 	if (retval)
1016 		goto out_free_dentry;
1017 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1018 		retval = -EFAULT; /* Nobody gets to see this, but.. */
1019 		goto out_free_dentry;
1020 	}
1021 
1022 	if (elf_interpreter) {
1023 		unsigned long interp_map_addr = 0;
1024 
1025 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1026 					    interpreter,
1027 					    &interp_map_addr,
1028 					    load_bias, interp_elf_phdata);
1029 		if (!IS_ERR((void *)elf_entry)) {
1030 			/*
1031 			 * load_elf_interp() returns relocation
1032 			 * adjustment
1033 			 */
1034 			interp_load_addr = elf_entry;
1035 			elf_entry += loc->interp_elf_ex.e_entry;
1036 		}
1037 		if (BAD_ADDR(elf_entry)) {
1038 			retval = IS_ERR((void *)elf_entry) ?
1039 					(int)elf_entry : -EINVAL;
1040 			goto out_free_dentry;
1041 		}
1042 		reloc_func_desc = interp_load_addr;
1043 
1044 		allow_write_access(interpreter);
1045 		fput(interpreter);
1046 		kfree(elf_interpreter);
1047 	} else {
1048 		elf_entry = loc->elf_ex.e_entry;
1049 		if (BAD_ADDR(elf_entry)) {
1050 			retval = -EINVAL;
1051 			goto out_free_dentry;
1052 		}
1053 	}
1054 
1055 	kfree(interp_elf_phdata);
1056 	kfree(elf_phdata);
1057 
1058 	set_binfmt(&elf_format);
1059 
1060 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1061 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1062 	if (retval < 0)
1063 		goto out;
1064 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1065 
1066 	retval = create_elf_tables(bprm, &loc->elf_ex,
1067 			  load_addr, interp_load_addr);
1068 	if (retval < 0)
1069 		goto out;
1070 	/* N.B. passed_fileno might not be initialized? */
1071 	current->mm->end_code = end_code;
1072 	current->mm->start_code = start_code;
1073 	current->mm->start_data = start_data;
1074 	current->mm->end_data = end_data;
1075 	current->mm->start_stack = bprm->p;
1076 
1077 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1078 		current->mm->brk = current->mm->start_brk =
1079 			arch_randomize_brk(current->mm);
1080 #ifdef compat_brk_randomized
1081 		current->brk_randomized = 1;
1082 #endif
1083 	}
1084 
1085 	if (current->personality & MMAP_PAGE_ZERO) {
1086 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1087 		   and some applications "depend" upon this behavior.
1088 		   Since we do not have the power to recompile these, we
1089 		   emulate the SVr4 behavior. Sigh. */
1090 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1091 				MAP_FIXED | MAP_PRIVATE, 0);
1092 	}
1093 
1094 #ifdef ELF_PLAT_INIT
1095 	/*
1096 	 * The ABI may specify that certain registers be set up in special
1097 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1098 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1099 	 * that the e_entry field is the address of the function descriptor
1100 	 * for the startup routine, rather than the address of the startup
1101 	 * routine itself.  This macro performs whatever initialization to
1102 	 * the regs structure is required as well as any relocations to the
1103 	 * function descriptor entries when executing dynamically links apps.
1104 	 */
1105 	ELF_PLAT_INIT(regs, reloc_func_desc);
1106 #endif
1107 
1108 	start_thread(regs, elf_entry, bprm->p);
1109 	retval = 0;
1110 out:
1111 	kfree(loc);
1112 out_ret:
1113 	return retval;
1114 
1115 	/* error cleanup */
1116 out_free_dentry:
1117 	kfree(interp_elf_phdata);
1118 	allow_write_access(interpreter);
1119 	if (interpreter)
1120 		fput(interpreter);
1121 out_free_interp:
1122 	kfree(elf_interpreter);
1123 out_free_ph:
1124 	kfree(elf_phdata);
1125 	goto out;
1126 }
1127 
1128 #ifdef CONFIG_USELIB
1129 /* This is really simpleminded and specialized - we are loading an
1130    a.out library that is given an ELF header. */
1131 static int load_elf_library(struct file *file)
1132 {
1133 	struct elf_phdr *elf_phdata;
1134 	struct elf_phdr *eppnt;
1135 	unsigned long elf_bss, bss, len;
1136 	int retval, error, i, j;
1137 	struct elfhdr elf_ex;
1138 
1139 	error = -ENOEXEC;
1140 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1141 	if (retval != sizeof(elf_ex))
1142 		goto out;
1143 
1144 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1145 		goto out;
1146 
1147 	/* First of all, some simple consistency checks */
1148 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1149 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1150 		goto out;
1151 
1152 	/* Now read in all of the header information */
1153 
1154 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1155 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1156 
1157 	error = -ENOMEM;
1158 	elf_phdata = kmalloc(j, GFP_KERNEL);
1159 	if (!elf_phdata)
1160 		goto out;
1161 
1162 	eppnt = elf_phdata;
1163 	error = -ENOEXEC;
1164 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1165 	if (retval != j)
1166 		goto out_free_ph;
1167 
1168 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1169 		if ((eppnt + i)->p_type == PT_LOAD)
1170 			j++;
1171 	if (j != 1)
1172 		goto out_free_ph;
1173 
1174 	while (eppnt->p_type != PT_LOAD)
1175 		eppnt++;
1176 
1177 	/* Now use mmap to map the library into memory. */
1178 	error = vm_mmap(file,
1179 			ELF_PAGESTART(eppnt->p_vaddr),
1180 			(eppnt->p_filesz +
1181 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1182 			PROT_READ | PROT_WRITE | PROT_EXEC,
1183 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1184 			(eppnt->p_offset -
1185 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1186 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1187 		goto out_free_ph;
1188 
1189 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1190 	if (padzero(elf_bss)) {
1191 		error = -EFAULT;
1192 		goto out_free_ph;
1193 	}
1194 
1195 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1196 			    ELF_MIN_ALIGN - 1);
1197 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1198 	if (bss > len) {
1199 		error = vm_brk(len, bss - len);
1200 		if (error)
1201 			goto out_free_ph;
1202 	}
1203 	error = 0;
1204 
1205 out_free_ph:
1206 	kfree(elf_phdata);
1207 out:
1208 	return error;
1209 }
1210 #endif /* #ifdef CONFIG_USELIB */
1211 
1212 #ifdef CONFIG_ELF_CORE
1213 /*
1214  * ELF core dumper
1215  *
1216  * Modelled on fs/exec.c:aout_core_dump()
1217  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1218  */
1219 
1220 /*
1221  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1222  * that are useful for post-mortem analysis are included in every core dump.
1223  * In that way we ensure that the core dump is fully interpretable later
1224  * without matching up the same kernel and hardware config to see what PC values
1225  * meant. These special mappings include - vDSO, vsyscall, and other
1226  * architecture specific mappings
1227  */
1228 static bool always_dump_vma(struct vm_area_struct *vma)
1229 {
1230 	/* Any vsyscall mappings? */
1231 	if (vma == get_gate_vma(vma->vm_mm))
1232 		return true;
1233 
1234 	/*
1235 	 * Assume that all vmas with a .name op should always be dumped.
1236 	 * If this changes, a new vm_ops field can easily be added.
1237 	 */
1238 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1239 		return true;
1240 
1241 	/*
1242 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1243 	 * such as vDSO sections.
1244 	 */
1245 	if (arch_vma_name(vma))
1246 		return true;
1247 
1248 	return false;
1249 }
1250 
1251 /*
1252  * Decide what to dump of a segment, part, all or none.
1253  */
1254 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1255 				   unsigned long mm_flags)
1256 {
1257 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1258 
1259 	/* always dump the vdso and vsyscall sections */
1260 	if (always_dump_vma(vma))
1261 		goto whole;
1262 
1263 	if (vma->vm_flags & VM_DONTDUMP)
1264 		return 0;
1265 
1266 	/* support for DAX */
1267 	if (vma_is_dax(vma)) {
1268 		if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1269 			goto whole;
1270 		if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1271 			goto whole;
1272 		return 0;
1273 	}
1274 
1275 	/* Hugetlb memory check */
1276 	if (vma->vm_flags & VM_HUGETLB) {
1277 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1278 			goto whole;
1279 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1280 			goto whole;
1281 		return 0;
1282 	}
1283 
1284 	/* Do not dump I/O mapped devices or special mappings */
1285 	if (vma->vm_flags & VM_IO)
1286 		return 0;
1287 
1288 	/* By default, dump shared memory if mapped from an anonymous file. */
1289 	if (vma->vm_flags & VM_SHARED) {
1290 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1291 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1292 			goto whole;
1293 		return 0;
1294 	}
1295 
1296 	/* Dump segments that have been written to.  */
1297 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1298 		goto whole;
1299 	if (vma->vm_file == NULL)
1300 		return 0;
1301 
1302 	if (FILTER(MAPPED_PRIVATE))
1303 		goto whole;
1304 
1305 	/*
1306 	 * If this looks like the beginning of a DSO or executable mapping,
1307 	 * check for an ELF header.  If we find one, dump the first page to
1308 	 * aid in determining what was mapped here.
1309 	 */
1310 	if (FILTER(ELF_HEADERS) &&
1311 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1312 		u32 __user *header = (u32 __user *) vma->vm_start;
1313 		u32 word;
1314 		mm_segment_t fs = get_fs();
1315 		/*
1316 		 * Doing it this way gets the constant folded by GCC.
1317 		 */
1318 		union {
1319 			u32 cmp;
1320 			char elfmag[SELFMAG];
1321 		} magic;
1322 		BUILD_BUG_ON(SELFMAG != sizeof word);
1323 		magic.elfmag[EI_MAG0] = ELFMAG0;
1324 		magic.elfmag[EI_MAG1] = ELFMAG1;
1325 		magic.elfmag[EI_MAG2] = ELFMAG2;
1326 		magic.elfmag[EI_MAG3] = ELFMAG3;
1327 		/*
1328 		 * Switch to the user "segment" for get_user(),
1329 		 * then put back what elf_core_dump() had in place.
1330 		 */
1331 		set_fs(USER_DS);
1332 		if (unlikely(get_user(word, header)))
1333 			word = 0;
1334 		set_fs(fs);
1335 		if (word == magic.cmp)
1336 			return PAGE_SIZE;
1337 	}
1338 
1339 #undef	FILTER
1340 
1341 	return 0;
1342 
1343 whole:
1344 	return vma->vm_end - vma->vm_start;
1345 }
1346 
1347 /* An ELF note in memory */
1348 struct memelfnote
1349 {
1350 	const char *name;
1351 	int type;
1352 	unsigned int datasz;
1353 	void *data;
1354 };
1355 
1356 static int notesize(struct memelfnote *en)
1357 {
1358 	int sz;
1359 
1360 	sz = sizeof(struct elf_note);
1361 	sz += roundup(strlen(en->name) + 1, 4);
1362 	sz += roundup(en->datasz, 4);
1363 
1364 	return sz;
1365 }
1366 
1367 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1368 {
1369 	struct elf_note en;
1370 	en.n_namesz = strlen(men->name) + 1;
1371 	en.n_descsz = men->datasz;
1372 	en.n_type = men->type;
1373 
1374 	return dump_emit(cprm, &en, sizeof(en)) &&
1375 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1376 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1377 }
1378 
1379 static void fill_elf_header(struct elfhdr *elf, int segs,
1380 			    u16 machine, u32 flags)
1381 {
1382 	memset(elf, 0, sizeof(*elf));
1383 
1384 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1385 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1386 	elf->e_ident[EI_DATA] = ELF_DATA;
1387 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1388 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1389 
1390 	elf->e_type = ET_CORE;
1391 	elf->e_machine = machine;
1392 	elf->e_version = EV_CURRENT;
1393 	elf->e_phoff = sizeof(struct elfhdr);
1394 	elf->e_flags = flags;
1395 	elf->e_ehsize = sizeof(struct elfhdr);
1396 	elf->e_phentsize = sizeof(struct elf_phdr);
1397 	elf->e_phnum = segs;
1398 
1399 	return;
1400 }
1401 
1402 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1403 {
1404 	phdr->p_type = PT_NOTE;
1405 	phdr->p_offset = offset;
1406 	phdr->p_vaddr = 0;
1407 	phdr->p_paddr = 0;
1408 	phdr->p_filesz = sz;
1409 	phdr->p_memsz = 0;
1410 	phdr->p_flags = 0;
1411 	phdr->p_align = 0;
1412 	return;
1413 }
1414 
1415 static void fill_note(struct memelfnote *note, const char *name, int type,
1416 		unsigned int sz, void *data)
1417 {
1418 	note->name = name;
1419 	note->type = type;
1420 	note->datasz = sz;
1421 	note->data = data;
1422 	return;
1423 }
1424 
1425 /*
1426  * fill up all the fields in prstatus from the given task struct, except
1427  * registers which need to be filled up separately.
1428  */
1429 static void fill_prstatus(struct elf_prstatus *prstatus,
1430 		struct task_struct *p, long signr)
1431 {
1432 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1433 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1434 	prstatus->pr_sighold = p->blocked.sig[0];
1435 	rcu_read_lock();
1436 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1437 	rcu_read_unlock();
1438 	prstatus->pr_pid = task_pid_vnr(p);
1439 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1440 	prstatus->pr_sid = task_session_vnr(p);
1441 	if (thread_group_leader(p)) {
1442 		struct task_cputime cputime;
1443 
1444 		/*
1445 		 * This is the record for the group leader.  It shows the
1446 		 * group-wide total, not its individual thread total.
1447 		 */
1448 		thread_group_cputime(p, &cputime);
1449 		prstatus->pr_utime = ns_to_timeval(cputime.utime);
1450 		prstatus->pr_stime = ns_to_timeval(cputime.stime);
1451 	} else {
1452 		u64 utime, stime;
1453 
1454 		task_cputime(p, &utime, &stime);
1455 		prstatus->pr_utime = ns_to_timeval(utime);
1456 		prstatus->pr_stime = ns_to_timeval(stime);
1457 	}
1458 
1459 	prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1460 	prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1461 }
1462 
1463 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1464 		       struct mm_struct *mm)
1465 {
1466 	const struct cred *cred;
1467 	unsigned int i, len;
1468 
1469 	/* first copy the parameters from user space */
1470 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1471 
1472 	len = mm->arg_end - mm->arg_start;
1473 	if (len >= ELF_PRARGSZ)
1474 		len = ELF_PRARGSZ-1;
1475 	if (copy_from_user(&psinfo->pr_psargs,
1476 		           (const char __user *)mm->arg_start, len))
1477 		return -EFAULT;
1478 	for(i = 0; i < len; i++)
1479 		if (psinfo->pr_psargs[i] == 0)
1480 			psinfo->pr_psargs[i] = ' ';
1481 	psinfo->pr_psargs[len] = 0;
1482 
1483 	rcu_read_lock();
1484 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1485 	rcu_read_unlock();
1486 	psinfo->pr_pid = task_pid_vnr(p);
1487 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1488 	psinfo->pr_sid = task_session_vnr(p);
1489 
1490 	i = p->state ? ffz(~p->state) + 1 : 0;
1491 	psinfo->pr_state = i;
1492 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1493 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1494 	psinfo->pr_nice = task_nice(p);
1495 	psinfo->pr_flag = p->flags;
1496 	rcu_read_lock();
1497 	cred = __task_cred(p);
1498 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1499 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1500 	rcu_read_unlock();
1501 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1502 
1503 	return 0;
1504 }
1505 
1506 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1507 {
1508 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1509 	int i = 0;
1510 	do
1511 		i += 2;
1512 	while (auxv[i - 2] != AT_NULL);
1513 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1514 }
1515 
1516 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1517 		const siginfo_t *siginfo)
1518 {
1519 	mm_segment_t old_fs = get_fs();
1520 	set_fs(KERNEL_DS);
1521 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1522 	set_fs(old_fs);
1523 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1524 }
1525 
1526 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1527 /*
1528  * Format of NT_FILE note:
1529  *
1530  * long count     -- how many files are mapped
1531  * long page_size -- units for file_ofs
1532  * array of [COUNT] elements of
1533  *   long start
1534  *   long end
1535  *   long file_ofs
1536  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1537  */
1538 static int fill_files_note(struct memelfnote *note)
1539 {
1540 	struct vm_area_struct *vma;
1541 	unsigned count, size, names_ofs, remaining, n;
1542 	user_long_t *data;
1543 	user_long_t *start_end_ofs;
1544 	char *name_base, *name_curpos;
1545 
1546 	/* *Estimated* file count and total data size needed */
1547 	count = current->mm->map_count;
1548 	size = count * 64;
1549 
1550 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1551  alloc:
1552 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1553 		return -EINVAL;
1554 	size = round_up(size, PAGE_SIZE);
1555 	data = vmalloc(size);
1556 	if (!data)
1557 		return -ENOMEM;
1558 
1559 	start_end_ofs = data + 2;
1560 	name_base = name_curpos = ((char *)data) + names_ofs;
1561 	remaining = size - names_ofs;
1562 	count = 0;
1563 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1564 		struct file *file;
1565 		const char *filename;
1566 
1567 		file = vma->vm_file;
1568 		if (!file)
1569 			continue;
1570 		filename = file_path(file, name_curpos, remaining);
1571 		if (IS_ERR(filename)) {
1572 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1573 				vfree(data);
1574 				size = size * 5 / 4;
1575 				goto alloc;
1576 			}
1577 			continue;
1578 		}
1579 
1580 		/* file_path() fills at the end, move name down */
1581 		/* n = strlen(filename) + 1: */
1582 		n = (name_curpos + remaining) - filename;
1583 		remaining = filename - name_curpos;
1584 		memmove(name_curpos, filename, n);
1585 		name_curpos += n;
1586 
1587 		*start_end_ofs++ = vma->vm_start;
1588 		*start_end_ofs++ = vma->vm_end;
1589 		*start_end_ofs++ = vma->vm_pgoff;
1590 		count++;
1591 	}
1592 
1593 	/* Now we know exact count of files, can store it */
1594 	data[0] = count;
1595 	data[1] = PAGE_SIZE;
1596 	/*
1597 	 * Count usually is less than current->mm->map_count,
1598 	 * we need to move filenames down.
1599 	 */
1600 	n = current->mm->map_count - count;
1601 	if (n != 0) {
1602 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1603 		memmove(name_base - shift_bytes, name_base,
1604 			name_curpos - name_base);
1605 		name_curpos -= shift_bytes;
1606 	}
1607 
1608 	size = name_curpos - (char *)data;
1609 	fill_note(note, "CORE", NT_FILE, size, data);
1610 	return 0;
1611 }
1612 
1613 #ifdef CORE_DUMP_USE_REGSET
1614 #include <linux/regset.h>
1615 
1616 struct elf_thread_core_info {
1617 	struct elf_thread_core_info *next;
1618 	struct task_struct *task;
1619 	struct elf_prstatus prstatus;
1620 	struct memelfnote notes[0];
1621 };
1622 
1623 struct elf_note_info {
1624 	struct elf_thread_core_info *thread;
1625 	struct memelfnote psinfo;
1626 	struct memelfnote signote;
1627 	struct memelfnote auxv;
1628 	struct memelfnote files;
1629 	user_siginfo_t csigdata;
1630 	size_t size;
1631 	int thread_notes;
1632 };
1633 
1634 /*
1635  * When a regset has a writeback hook, we call it on each thread before
1636  * dumping user memory.  On register window machines, this makes sure the
1637  * user memory backing the register data is up to date before we read it.
1638  */
1639 static void do_thread_regset_writeback(struct task_struct *task,
1640 				       const struct user_regset *regset)
1641 {
1642 	if (regset->writeback)
1643 		regset->writeback(task, regset, 1);
1644 }
1645 
1646 #ifndef PRSTATUS_SIZE
1647 #define PRSTATUS_SIZE(S, R) sizeof(S)
1648 #endif
1649 
1650 #ifndef SET_PR_FPVALID
1651 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1652 #endif
1653 
1654 static int fill_thread_core_info(struct elf_thread_core_info *t,
1655 				 const struct user_regset_view *view,
1656 				 long signr, size_t *total)
1657 {
1658 	unsigned int i;
1659 	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1660 
1661 	/*
1662 	 * NT_PRSTATUS is the one special case, because the regset data
1663 	 * goes into the pr_reg field inside the note contents, rather
1664 	 * than being the whole note contents.  We fill the reset in here.
1665 	 * We assume that regset 0 is NT_PRSTATUS.
1666 	 */
1667 	fill_prstatus(&t->prstatus, t->task, signr);
1668 	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1669 				    &t->prstatus.pr_reg, NULL);
1670 
1671 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1672 		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1673 	*total += notesize(&t->notes[0]);
1674 
1675 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1676 
1677 	/*
1678 	 * Each other regset might generate a note too.  For each regset
1679 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1680 	 * all zero and we'll know to skip writing it later.
1681 	 */
1682 	for (i = 1; i < view->n; ++i) {
1683 		const struct user_regset *regset = &view->regsets[i];
1684 		do_thread_regset_writeback(t->task, regset);
1685 		if (regset->core_note_type && regset->get &&
1686 		    (!regset->active || regset->active(t->task, regset))) {
1687 			int ret;
1688 			size_t size = regset->n * regset->size;
1689 			void *data = kmalloc(size, GFP_KERNEL);
1690 			if (unlikely(!data))
1691 				return 0;
1692 			ret = regset->get(t->task, regset,
1693 					  0, size, data, NULL);
1694 			if (unlikely(ret))
1695 				kfree(data);
1696 			else {
1697 				if (regset->core_note_type != NT_PRFPREG)
1698 					fill_note(&t->notes[i], "LINUX",
1699 						  regset->core_note_type,
1700 						  size, data);
1701 				else {
1702 					SET_PR_FPVALID(&t->prstatus,
1703 							1, regset_size);
1704 					fill_note(&t->notes[i], "CORE",
1705 						  NT_PRFPREG, size, data);
1706 				}
1707 				*total += notesize(&t->notes[i]);
1708 			}
1709 		}
1710 	}
1711 
1712 	return 1;
1713 }
1714 
1715 static int fill_note_info(struct elfhdr *elf, int phdrs,
1716 			  struct elf_note_info *info,
1717 			  const siginfo_t *siginfo, struct pt_regs *regs)
1718 {
1719 	struct task_struct *dump_task = current;
1720 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1721 	struct elf_thread_core_info *t;
1722 	struct elf_prpsinfo *psinfo;
1723 	struct core_thread *ct;
1724 	unsigned int i;
1725 
1726 	info->size = 0;
1727 	info->thread = NULL;
1728 
1729 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1730 	if (psinfo == NULL) {
1731 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1732 		return 0;
1733 	}
1734 
1735 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1736 
1737 	/*
1738 	 * Figure out how many notes we're going to need for each thread.
1739 	 */
1740 	info->thread_notes = 0;
1741 	for (i = 0; i < view->n; ++i)
1742 		if (view->regsets[i].core_note_type != 0)
1743 			++info->thread_notes;
1744 
1745 	/*
1746 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1747 	 * since it is our one special case.
1748 	 */
1749 	if (unlikely(info->thread_notes == 0) ||
1750 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1751 		WARN_ON(1);
1752 		return 0;
1753 	}
1754 
1755 	/*
1756 	 * Initialize the ELF file header.
1757 	 */
1758 	fill_elf_header(elf, phdrs,
1759 			view->e_machine, view->e_flags);
1760 
1761 	/*
1762 	 * Allocate a structure for each thread.
1763 	 */
1764 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1765 		t = kzalloc(offsetof(struct elf_thread_core_info,
1766 				     notes[info->thread_notes]),
1767 			    GFP_KERNEL);
1768 		if (unlikely(!t))
1769 			return 0;
1770 
1771 		t->task = ct->task;
1772 		if (ct->task == dump_task || !info->thread) {
1773 			t->next = info->thread;
1774 			info->thread = t;
1775 		} else {
1776 			/*
1777 			 * Make sure to keep the original task at
1778 			 * the head of the list.
1779 			 */
1780 			t->next = info->thread->next;
1781 			info->thread->next = t;
1782 		}
1783 	}
1784 
1785 	/*
1786 	 * Now fill in each thread's information.
1787 	 */
1788 	for (t = info->thread; t != NULL; t = t->next)
1789 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1790 			return 0;
1791 
1792 	/*
1793 	 * Fill in the two process-wide notes.
1794 	 */
1795 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1796 	info->size += notesize(&info->psinfo);
1797 
1798 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1799 	info->size += notesize(&info->signote);
1800 
1801 	fill_auxv_note(&info->auxv, current->mm);
1802 	info->size += notesize(&info->auxv);
1803 
1804 	if (fill_files_note(&info->files) == 0)
1805 		info->size += notesize(&info->files);
1806 
1807 	return 1;
1808 }
1809 
1810 static size_t get_note_info_size(struct elf_note_info *info)
1811 {
1812 	return info->size;
1813 }
1814 
1815 /*
1816  * Write all the notes for each thread.  When writing the first thread, the
1817  * process-wide notes are interleaved after the first thread-specific note.
1818  */
1819 static int write_note_info(struct elf_note_info *info,
1820 			   struct coredump_params *cprm)
1821 {
1822 	bool first = true;
1823 	struct elf_thread_core_info *t = info->thread;
1824 
1825 	do {
1826 		int i;
1827 
1828 		if (!writenote(&t->notes[0], cprm))
1829 			return 0;
1830 
1831 		if (first && !writenote(&info->psinfo, cprm))
1832 			return 0;
1833 		if (first && !writenote(&info->signote, cprm))
1834 			return 0;
1835 		if (first && !writenote(&info->auxv, cprm))
1836 			return 0;
1837 		if (first && info->files.data &&
1838 				!writenote(&info->files, cprm))
1839 			return 0;
1840 
1841 		for (i = 1; i < info->thread_notes; ++i)
1842 			if (t->notes[i].data &&
1843 			    !writenote(&t->notes[i], cprm))
1844 				return 0;
1845 
1846 		first = false;
1847 		t = t->next;
1848 	} while (t);
1849 
1850 	return 1;
1851 }
1852 
1853 static void free_note_info(struct elf_note_info *info)
1854 {
1855 	struct elf_thread_core_info *threads = info->thread;
1856 	while (threads) {
1857 		unsigned int i;
1858 		struct elf_thread_core_info *t = threads;
1859 		threads = t->next;
1860 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1861 		for (i = 1; i < info->thread_notes; ++i)
1862 			kfree(t->notes[i].data);
1863 		kfree(t);
1864 	}
1865 	kfree(info->psinfo.data);
1866 	vfree(info->files.data);
1867 }
1868 
1869 #else
1870 
1871 /* Here is the structure in which status of each thread is captured. */
1872 struct elf_thread_status
1873 {
1874 	struct list_head list;
1875 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1876 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1877 	struct task_struct *thread;
1878 #ifdef ELF_CORE_COPY_XFPREGS
1879 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1880 #endif
1881 	struct memelfnote notes[3];
1882 	int num_notes;
1883 };
1884 
1885 /*
1886  * In order to add the specific thread information for the elf file format,
1887  * we need to keep a linked list of every threads pr_status and then create
1888  * a single section for them in the final core file.
1889  */
1890 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1891 {
1892 	int sz = 0;
1893 	struct task_struct *p = t->thread;
1894 	t->num_notes = 0;
1895 
1896 	fill_prstatus(&t->prstatus, p, signr);
1897 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1898 
1899 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1900 		  &(t->prstatus));
1901 	t->num_notes++;
1902 	sz += notesize(&t->notes[0]);
1903 
1904 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1905 								&t->fpu))) {
1906 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1907 			  &(t->fpu));
1908 		t->num_notes++;
1909 		sz += notesize(&t->notes[1]);
1910 	}
1911 
1912 #ifdef ELF_CORE_COPY_XFPREGS
1913 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1914 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1915 			  sizeof(t->xfpu), &t->xfpu);
1916 		t->num_notes++;
1917 		sz += notesize(&t->notes[2]);
1918 	}
1919 #endif
1920 	return sz;
1921 }
1922 
1923 struct elf_note_info {
1924 	struct memelfnote *notes;
1925 	struct memelfnote *notes_files;
1926 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1927 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1928 	struct list_head thread_list;
1929 	elf_fpregset_t *fpu;
1930 #ifdef ELF_CORE_COPY_XFPREGS
1931 	elf_fpxregset_t *xfpu;
1932 #endif
1933 	user_siginfo_t csigdata;
1934 	int thread_status_size;
1935 	int numnote;
1936 };
1937 
1938 static int elf_note_info_init(struct elf_note_info *info)
1939 {
1940 	memset(info, 0, sizeof(*info));
1941 	INIT_LIST_HEAD(&info->thread_list);
1942 
1943 	/* Allocate space for ELF notes */
1944 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1945 	if (!info->notes)
1946 		return 0;
1947 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1948 	if (!info->psinfo)
1949 		return 0;
1950 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1951 	if (!info->prstatus)
1952 		return 0;
1953 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1954 	if (!info->fpu)
1955 		return 0;
1956 #ifdef ELF_CORE_COPY_XFPREGS
1957 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1958 	if (!info->xfpu)
1959 		return 0;
1960 #endif
1961 	return 1;
1962 }
1963 
1964 static int fill_note_info(struct elfhdr *elf, int phdrs,
1965 			  struct elf_note_info *info,
1966 			  const siginfo_t *siginfo, struct pt_regs *regs)
1967 {
1968 	struct list_head *t;
1969 	struct core_thread *ct;
1970 	struct elf_thread_status *ets;
1971 
1972 	if (!elf_note_info_init(info))
1973 		return 0;
1974 
1975 	for (ct = current->mm->core_state->dumper.next;
1976 					ct; ct = ct->next) {
1977 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1978 		if (!ets)
1979 			return 0;
1980 
1981 		ets->thread = ct->task;
1982 		list_add(&ets->list, &info->thread_list);
1983 	}
1984 
1985 	list_for_each(t, &info->thread_list) {
1986 		int sz;
1987 
1988 		ets = list_entry(t, struct elf_thread_status, list);
1989 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1990 		info->thread_status_size += sz;
1991 	}
1992 	/* now collect the dump for the current */
1993 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1994 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1995 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1996 
1997 	/* Set up header */
1998 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1999 
2000 	/*
2001 	 * Set up the notes in similar form to SVR4 core dumps made
2002 	 * with info from their /proc.
2003 	 */
2004 
2005 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2006 		  sizeof(*info->prstatus), info->prstatus);
2007 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
2008 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2009 		  sizeof(*info->psinfo), info->psinfo);
2010 
2011 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2012 	fill_auxv_note(info->notes + 3, current->mm);
2013 	info->numnote = 4;
2014 
2015 	if (fill_files_note(info->notes + info->numnote) == 0) {
2016 		info->notes_files = info->notes + info->numnote;
2017 		info->numnote++;
2018 	}
2019 
2020 	/* Try to dump the FPU. */
2021 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2022 							       info->fpu);
2023 	if (info->prstatus->pr_fpvalid)
2024 		fill_note(info->notes + info->numnote++,
2025 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2026 #ifdef ELF_CORE_COPY_XFPREGS
2027 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
2028 		fill_note(info->notes + info->numnote++,
2029 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2030 			  sizeof(*info->xfpu), info->xfpu);
2031 #endif
2032 
2033 	return 1;
2034 }
2035 
2036 static size_t get_note_info_size(struct elf_note_info *info)
2037 {
2038 	int sz = 0;
2039 	int i;
2040 
2041 	for (i = 0; i < info->numnote; i++)
2042 		sz += notesize(info->notes + i);
2043 
2044 	sz += info->thread_status_size;
2045 
2046 	return sz;
2047 }
2048 
2049 static int write_note_info(struct elf_note_info *info,
2050 			   struct coredump_params *cprm)
2051 {
2052 	int i;
2053 	struct list_head *t;
2054 
2055 	for (i = 0; i < info->numnote; i++)
2056 		if (!writenote(info->notes + i, cprm))
2057 			return 0;
2058 
2059 	/* write out the thread status notes section */
2060 	list_for_each(t, &info->thread_list) {
2061 		struct elf_thread_status *tmp =
2062 				list_entry(t, struct elf_thread_status, list);
2063 
2064 		for (i = 0; i < tmp->num_notes; i++)
2065 			if (!writenote(&tmp->notes[i], cprm))
2066 				return 0;
2067 	}
2068 
2069 	return 1;
2070 }
2071 
2072 static void free_note_info(struct elf_note_info *info)
2073 {
2074 	while (!list_empty(&info->thread_list)) {
2075 		struct list_head *tmp = info->thread_list.next;
2076 		list_del(tmp);
2077 		kfree(list_entry(tmp, struct elf_thread_status, list));
2078 	}
2079 
2080 	/* Free data possibly allocated by fill_files_note(): */
2081 	if (info->notes_files)
2082 		vfree(info->notes_files->data);
2083 
2084 	kfree(info->prstatus);
2085 	kfree(info->psinfo);
2086 	kfree(info->notes);
2087 	kfree(info->fpu);
2088 #ifdef ELF_CORE_COPY_XFPREGS
2089 	kfree(info->xfpu);
2090 #endif
2091 }
2092 
2093 #endif
2094 
2095 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2096 					struct vm_area_struct *gate_vma)
2097 {
2098 	struct vm_area_struct *ret = tsk->mm->mmap;
2099 
2100 	if (ret)
2101 		return ret;
2102 	return gate_vma;
2103 }
2104 /*
2105  * Helper function for iterating across a vma list.  It ensures that the caller
2106  * will visit `gate_vma' prior to terminating the search.
2107  */
2108 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2109 					struct vm_area_struct *gate_vma)
2110 {
2111 	struct vm_area_struct *ret;
2112 
2113 	ret = this_vma->vm_next;
2114 	if (ret)
2115 		return ret;
2116 	if (this_vma == gate_vma)
2117 		return NULL;
2118 	return gate_vma;
2119 }
2120 
2121 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2122 			     elf_addr_t e_shoff, int segs)
2123 {
2124 	elf->e_shoff = e_shoff;
2125 	elf->e_shentsize = sizeof(*shdr4extnum);
2126 	elf->e_shnum = 1;
2127 	elf->e_shstrndx = SHN_UNDEF;
2128 
2129 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2130 
2131 	shdr4extnum->sh_type = SHT_NULL;
2132 	shdr4extnum->sh_size = elf->e_shnum;
2133 	shdr4extnum->sh_link = elf->e_shstrndx;
2134 	shdr4extnum->sh_info = segs;
2135 }
2136 
2137 /*
2138  * Actual dumper
2139  *
2140  * This is a two-pass process; first we find the offsets of the bits,
2141  * and then they are actually written out.  If we run out of core limit
2142  * we just truncate.
2143  */
2144 static int elf_core_dump(struct coredump_params *cprm)
2145 {
2146 	int has_dumped = 0;
2147 	mm_segment_t fs;
2148 	int segs, i;
2149 	size_t vma_data_size = 0;
2150 	struct vm_area_struct *vma, *gate_vma;
2151 	struct elfhdr *elf = NULL;
2152 	loff_t offset = 0, dataoff;
2153 	struct elf_note_info info = { };
2154 	struct elf_phdr *phdr4note = NULL;
2155 	struct elf_shdr *shdr4extnum = NULL;
2156 	Elf_Half e_phnum;
2157 	elf_addr_t e_shoff;
2158 	elf_addr_t *vma_filesz = NULL;
2159 
2160 	/*
2161 	 * We no longer stop all VM operations.
2162 	 *
2163 	 * This is because those proceses that could possibly change map_count
2164 	 * or the mmap / vma pages are now blocked in do_exit on current
2165 	 * finishing this core dump.
2166 	 *
2167 	 * Only ptrace can touch these memory addresses, but it doesn't change
2168 	 * the map_count or the pages allocated. So no possibility of crashing
2169 	 * exists while dumping the mm->vm_next areas to the core file.
2170 	 */
2171 
2172 	/* alloc memory for large data structures: too large to be on stack */
2173 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2174 	if (!elf)
2175 		goto out;
2176 	/*
2177 	 * The number of segs are recored into ELF header as 16bit value.
2178 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2179 	 */
2180 	segs = current->mm->map_count;
2181 	segs += elf_core_extra_phdrs();
2182 
2183 	gate_vma = get_gate_vma(current->mm);
2184 	if (gate_vma != NULL)
2185 		segs++;
2186 
2187 	/* for notes section */
2188 	segs++;
2189 
2190 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2191 	 * this, kernel supports extended numbering. Have a look at
2192 	 * include/linux/elf.h for further information. */
2193 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2194 
2195 	/*
2196 	 * Collect all the non-memory information about the process for the
2197 	 * notes.  This also sets up the file header.
2198 	 */
2199 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2200 		goto cleanup;
2201 
2202 	has_dumped = 1;
2203 
2204 	fs = get_fs();
2205 	set_fs(KERNEL_DS);
2206 
2207 	offset += sizeof(*elf);				/* Elf header */
2208 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2209 
2210 	/* Write notes phdr entry */
2211 	{
2212 		size_t sz = get_note_info_size(&info);
2213 
2214 		sz += elf_coredump_extra_notes_size();
2215 
2216 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2217 		if (!phdr4note)
2218 			goto end_coredump;
2219 
2220 		fill_elf_note_phdr(phdr4note, sz, offset);
2221 		offset += sz;
2222 	}
2223 
2224 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2225 
2226 	if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2227 		goto end_coredump;
2228 	vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2229 	if (!vma_filesz)
2230 		goto end_coredump;
2231 
2232 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2233 			vma = next_vma(vma, gate_vma)) {
2234 		unsigned long dump_size;
2235 
2236 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2237 		vma_filesz[i++] = dump_size;
2238 		vma_data_size += dump_size;
2239 	}
2240 
2241 	offset += vma_data_size;
2242 	offset += elf_core_extra_data_size();
2243 	e_shoff = offset;
2244 
2245 	if (e_phnum == PN_XNUM) {
2246 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2247 		if (!shdr4extnum)
2248 			goto end_coredump;
2249 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2250 	}
2251 
2252 	offset = dataoff;
2253 
2254 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2255 		goto end_coredump;
2256 
2257 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2258 		goto end_coredump;
2259 
2260 	/* Write program headers for segments dump */
2261 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2262 			vma = next_vma(vma, gate_vma)) {
2263 		struct elf_phdr phdr;
2264 
2265 		phdr.p_type = PT_LOAD;
2266 		phdr.p_offset = offset;
2267 		phdr.p_vaddr = vma->vm_start;
2268 		phdr.p_paddr = 0;
2269 		phdr.p_filesz = vma_filesz[i++];
2270 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2271 		offset += phdr.p_filesz;
2272 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2273 		if (vma->vm_flags & VM_WRITE)
2274 			phdr.p_flags |= PF_W;
2275 		if (vma->vm_flags & VM_EXEC)
2276 			phdr.p_flags |= PF_X;
2277 		phdr.p_align = ELF_EXEC_PAGESIZE;
2278 
2279 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2280 			goto end_coredump;
2281 	}
2282 
2283 	if (!elf_core_write_extra_phdrs(cprm, offset))
2284 		goto end_coredump;
2285 
2286  	/* write out the notes section */
2287 	if (!write_note_info(&info, cprm))
2288 		goto end_coredump;
2289 
2290 	if (elf_coredump_extra_notes_write(cprm))
2291 		goto end_coredump;
2292 
2293 	/* Align to page */
2294 	if (!dump_skip(cprm, dataoff - cprm->pos))
2295 		goto end_coredump;
2296 
2297 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2298 			vma = next_vma(vma, gate_vma)) {
2299 		unsigned long addr;
2300 		unsigned long end;
2301 
2302 		end = vma->vm_start + vma_filesz[i++];
2303 
2304 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2305 			struct page *page;
2306 			int stop;
2307 
2308 			page = get_dump_page(addr);
2309 			if (page) {
2310 				void *kaddr = kmap(page);
2311 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2312 				kunmap(page);
2313 				put_page(page);
2314 			} else
2315 				stop = !dump_skip(cprm, PAGE_SIZE);
2316 			if (stop)
2317 				goto end_coredump;
2318 		}
2319 	}
2320 	dump_truncate(cprm);
2321 
2322 	if (!elf_core_write_extra_data(cprm))
2323 		goto end_coredump;
2324 
2325 	if (e_phnum == PN_XNUM) {
2326 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2327 			goto end_coredump;
2328 	}
2329 
2330 end_coredump:
2331 	set_fs(fs);
2332 
2333 cleanup:
2334 	free_note_info(&info);
2335 	kfree(shdr4extnum);
2336 	vfree(vma_filesz);
2337 	kfree(phdr4note);
2338 	kfree(elf);
2339 out:
2340 	return has_dumped;
2341 }
2342 
2343 #endif		/* CONFIG_ELF_CORE */
2344 
2345 static int __init init_elf_binfmt(void)
2346 {
2347 	register_binfmt(&elf_format);
2348 	return 0;
2349 }
2350 
2351 static void __exit exit_elf_binfmt(void)
2352 {
2353 	/* Remove the COFF and ELF loaders. */
2354 	unregister_binfmt(&elf_format);
2355 }
2356 
2357 core_initcall(init_elf_binfmt);
2358 module_exit(exit_elf_binfmt);
2359 MODULE_LICENSE("GPL");
2360