xref: /linux/fs/binfmt_elf.c (revision 827634added7f38b7d724cab1dccdb2b004c13c3)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <asm/uaccess.h>
39 #include <asm/param.h>
40 #include <asm/page.h>
41 
42 #ifndef user_long_t
43 #define user_long_t long
44 #endif
45 #ifndef user_siginfo_t
46 #define user_siginfo_t siginfo_t
47 #endif
48 
49 static int load_elf_binary(struct linux_binprm *bprm);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 				int, int, unsigned long);
52 
53 #ifdef CONFIG_USELIB
54 static int load_elf_library(struct file *);
55 #else
56 #define load_elf_library NULL
57 #endif
58 
59 /*
60  * If we don't support core dumping, then supply a NULL so we
61  * don't even try.
62  */
63 #ifdef CONFIG_ELF_CORE
64 static int elf_core_dump(struct coredump_params *cprm);
65 #else
66 #define elf_core_dump	NULL
67 #endif
68 
69 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
70 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
71 #else
72 #define ELF_MIN_ALIGN	PAGE_SIZE
73 #endif
74 
75 #ifndef ELF_CORE_EFLAGS
76 #define ELF_CORE_EFLAGS	0
77 #endif
78 
79 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
80 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
81 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
82 
83 static struct linux_binfmt elf_format = {
84 	.module		= THIS_MODULE,
85 	.load_binary	= load_elf_binary,
86 	.load_shlib	= load_elf_library,
87 	.core_dump	= elf_core_dump,
88 	.min_coredump	= ELF_EXEC_PAGESIZE,
89 };
90 
91 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
92 
93 static int set_brk(unsigned long start, unsigned long end)
94 {
95 	start = ELF_PAGEALIGN(start);
96 	end = ELF_PAGEALIGN(end);
97 	if (end > start) {
98 		unsigned long addr;
99 		addr = vm_brk(start, end - start);
100 		if (BAD_ADDR(addr))
101 			return addr;
102 	}
103 	current->mm->start_brk = current->mm->brk = end;
104 	return 0;
105 }
106 
107 /* We need to explicitly zero any fractional pages
108    after the data section (i.e. bss).  This would
109    contain the junk from the file that should not
110    be in memory
111  */
112 static int padzero(unsigned long elf_bss)
113 {
114 	unsigned long nbyte;
115 
116 	nbyte = ELF_PAGEOFFSET(elf_bss);
117 	if (nbyte) {
118 		nbyte = ELF_MIN_ALIGN - nbyte;
119 		if (clear_user((void __user *) elf_bss, nbyte))
120 			return -EFAULT;
121 	}
122 	return 0;
123 }
124 
125 /* Let's use some macros to make this stack manipulation a little clearer */
126 #ifdef CONFIG_STACK_GROWSUP
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
128 #define STACK_ROUND(sp, items) \
129 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ \
131 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
132 	old_sp; })
133 #else
134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135 #define STACK_ROUND(sp, items) \
136 	(((unsigned long) (sp - items)) &~ 15UL)
137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138 #endif
139 
140 #ifndef ELF_BASE_PLATFORM
141 /*
142  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
143  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
144  * will be copied to the user stack in the same manner as AT_PLATFORM.
145  */
146 #define ELF_BASE_PLATFORM NULL
147 #endif
148 
149 static int
150 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
151 		unsigned long load_addr, unsigned long interp_load_addr)
152 {
153 	unsigned long p = bprm->p;
154 	int argc = bprm->argc;
155 	int envc = bprm->envc;
156 	elf_addr_t __user *argv;
157 	elf_addr_t __user *envp;
158 	elf_addr_t __user *sp;
159 	elf_addr_t __user *u_platform;
160 	elf_addr_t __user *u_base_platform;
161 	elf_addr_t __user *u_rand_bytes;
162 	const char *k_platform = ELF_PLATFORM;
163 	const char *k_base_platform = ELF_BASE_PLATFORM;
164 	unsigned char k_rand_bytes[16];
165 	int items;
166 	elf_addr_t *elf_info;
167 	int ei_index = 0;
168 	const struct cred *cred = current_cred();
169 	struct vm_area_struct *vma;
170 
171 	/*
172 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173 	 * evictions by the processes running on the same package. One
174 	 * thing we can do is to shuffle the initial stack for them.
175 	 */
176 
177 	p = arch_align_stack(p);
178 
179 	/*
180 	 * If this architecture has a platform capability string, copy it
181 	 * to userspace.  In some cases (Sparc), this info is impossible
182 	 * for userspace to get any other way, in others (i386) it is
183 	 * merely difficult.
184 	 */
185 	u_platform = NULL;
186 	if (k_platform) {
187 		size_t len = strlen(k_platform) + 1;
188 
189 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190 		if (__copy_to_user(u_platform, k_platform, len))
191 			return -EFAULT;
192 	}
193 
194 	/*
195 	 * If this architecture has a "base" platform capability
196 	 * string, copy it to userspace.
197 	 */
198 	u_base_platform = NULL;
199 	if (k_base_platform) {
200 		size_t len = strlen(k_base_platform) + 1;
201 
202 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203 		if (__copy_to_user(u_base_platform, k_base_platform, len))
204 			return -EFAULT;
205 	}
206 
207 	/*
208 	 * Generate 16 random bytes for userspace PRNG seeding.
209 	 */
210 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211 	u_rand_bytes = (elf_addr_t __user *)
212 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
213 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
214 		return -EFAULT;
215 
216 	/* Create the ELF interpreter info */
217 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
218 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
219 #define NEW_AUX_ENT(id, val) \
220 	do { \
221 		elf_info[ei_index++] = id; \
222 		elf_info[ei_index++] = val; \
223 	} while (0)
224 
225 #ifdef ARCH_DLINFO
226 	/*
227 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
228 	 * AUXV.
229 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230 	 * ARCH_DLINFO changes
231 	 */
232 	ARCH_DLINFO;
233 #endif
234 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
237 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
238 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
239 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
241 	NEW_AUX_ENT(AT_FLAGS, 0);
242 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
243 	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
244 	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
245 	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
246 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
247  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
248 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
249 #ifdef ELF_HWCAP2
250 	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
251 #endif
252 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
253 	if (k_platform) {
254 		NEW_AUX_ENT(AT_PLATFORM,
255 			    (elf_addr_t)(unsigned long)u_platform);
256 	}
257 	if (k_base_platform) {
258 		NEW_AUX_ENT(AT_BASE_PLATFORM,
259 			    (elf_addr_t)(unsigned long)u_base_platform);
260 	}
261 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
262 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
263 	}
264 #undef NEW_AUX_ENT
265 	/* AT_NULL is zero; clear the rest too */
266 	memset(&elf_info[ei_index], 0,
267 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
268 
269 	/* And advance past the AT_NULL entry.  */
270 	ei_index += 2;
271 
272 	sp = STACK_ADD(p, ei_index);
273 
274 	items = (argc + 1) + (envc + 1) + 1;
275 	bprm->p = STACK_ROUND(sp, items);
276 
277 	/* Point sp at the lowest address on the stack */
278 #ifdef CONFIG_STACK_GROWSUP
279 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
280 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
281 #else
282 	sp = (elf_addr_t __user *)bprm->p;
283 #endif
284 
285 
286 	/*
287 	 * Grow the stack manually; some architectures have a limit on how
288 	 * far ahead a user-space access may be in order to grow the stack.
289 	 */
290 	vma = find_extend_vma(current->mm, bprm->p);
291 	if (!vma)
292 		return -EFAULT;
293 
294 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
295 	if (__put_user(argc, sp++))
296 		return -EFAULT;
297 	argv = sp;
298 	envp = argv + argc + 1;
299 
300 	/* Populate argv and envp */
301 	p = current->mm->arg_end = current->mm->arg_start;
302 	while (argc-- > 0) {
303 		size_t len;
304 		if (__put_user((elf_addr_t)p, argv++))
305 			return -EFAULT;
306 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
307 		if (!len || len > MAX_ARG_STRLEN)
308 			return -EINVAL;
309 		p += len;
310 	}
311 	if (__put_user(0, argv))
312 		return -EFAULT;
313 	current->mm->arg_end = current->mm->env_start = p;
314 	while (envc-- > 0) {
315 		size_t len;
316 		if (__put_user((elf_addr_t)p, envp++))
317 			return -EFAULT;
318 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
319 		if (!len || len > MAX_ARG_STRLEN)
320 			return -EINVAL;
321 		p += len;
322 	}
323 	if (__put_user(0, envp))
324 		return -EFAULT;
325 	current->mm->env_end = p;
326 
327 	/* Put the elf_info on the stack in the right place.  */
328 	sp = (elf_addr_t __user *)envp + 1;
329 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
330 		return -EFAULT;
331 	return 0;
332 }
333 
334 #ifndef elf_map
335 
336 static unsigned long elf_map(struct file *filep, unsigned long addr,
337 		struct elf_phdr *eppnt, int prot, int type,
338 		unsigned long total_size)
339 {
340 	unsigned long map_addr;
341 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
342 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
343 	addr = ELF_PAGESTART(addr);
344 	size = ELF_PAGEALIGN(size);
345 
346 	/* mmap() will return -EINVAL if given a zero size, but a
347 	 * segment with zero filesize is perfectly valid */
348 	if (!size)
349 		return addr;
350 
351 	/*
352 	* total_size is the size of the ELF (interpreter) image.
353 	* The _first_ mmap needs to know the full size, otherwise
354 	* randomization might put this image into an overlapping
355 	* position with the ELF binary image. (since size < total_size)
356 	* So we first map the 'big' image - and unmap the remainder at
357 	* the end. (which unmap is needed for ELF images with holes.)
358 	*/
359 	if (total_size) {
360 		total_size = ELF_PAGEALIGN(total_size);
361 		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
362 		if (!BAD_ADDR(map_addr))
363 			vm_munmap(map_addr+size, total_size-size);
364 	} else
365 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
366 
367 	return(map_addr);
368 }
369 
370 #endif /* !elf_map */
371 
372 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
373 {
374 	int i, first_idx = -1, last_idx = -1;
375 
376 	for (i = 0; i < nr; i++) {
377 		if (cmds[i].p_type == PT_LOAD) {
378 			last_idx = i;
379 			if (first_idx == -1)
380 				first_idx = i;
381 		}
382 	}
383 	if (first_idx == -1)
384 		return 0;
385 
386 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
387 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
388 }
389 
390 /**
391  * load_elf_phdrs() - load ELF program headers
392  * @elf_ex:   ELF header of the binary whose program headers should be loaded
393  * @elf_file: the opened ELF binary file
394  *
395  * Loads ELF program headers from the binary file elf_file, which has the ELF
396  * header pointed to by elf_ex, into a newly allocated array. The caller is
397  * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
398  */
399 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
400 				       struct file *elf_file)
401 {
402 	struct elf_phdr *elf_phdata = NULL;
403 	int retval, size, err = -1;
404 
405 	/*
406 	 * If the size of this structure has changed, then punt, since
407 	 * we will be doing the wrong thing.
408 	 */
409 	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
410 		goto out;
411 
412 	/* Sanity check the number of program headers... */
413 	if (elf_ex->e_phnum < 1 ||
414 		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
415 		goto out;
416 
417 	/* ...and their total size. */
418 	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
419 	if (size > ELF_MIN_ALIGN)
420 		goto out;
421 
422 	elf_phdata = kmalloc(size, GFP_KERNEL);
423 	if (!elf_phdata)
424 		goto out;
425 
426 	/* Read in the program headers */
427 	retval = kernel_read(elf_file, elf_ex->e_phoff,
428 			     (char *)elf_phdata, size);
429 	if (retval != size) {
430 		err = (retval < 0) ? retval : -EIO;
431 		goto out;
432 	}
433 
434 	/* Success! */
435 	err = 0;
436 out:
437 	if (err) {
438 		kfree(elf_phdata);
439 		elf_phdata = NULL;
440 	}
441 	return elf_phdata;
442 }
443 
444 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
445 
446 /**
447  * struct arch_elf_state - arch-specific ELF loading state
448  *
449  * This structure is used to preserve architecture specific data during
450  * the loading of an ELF file, throughout the checking of architecture
451  * specific ELF headers & through to the point where the ELF load is
452  * known to be proceeding (ie. SET_PERSONALITY).
453  *
454  * This implementation is a dummy for architectures which require no
455  * specific state.
456  */
457 struct arch_elf_state {
458 };
459 
460 #define INIT_ARCH_ELF_STATE {}
461 
462 /**
463  * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
464  * @ehdr:	The main ELF header
465  * @phdr:	The program header to check
466  * @elf:	The open ELF file
467  * @is_interp:	True if the phdr is from the interpreter of the ELF being
468  *		loaded, else false.
469  * @state:	Architecture-specific state preserved throughout the process
470  *		of loading the ELF.
471  *
472  * Inspects the program header phdr to validate its correctness and/or
473  * suitability for the system. Called once per ELF program header in the
474  * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
475  * interpreter.
476  *
477  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
478  *         with that return code.
479  */
480 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
481 				   struct elf_phdr *phdr,
482 				   struct file *elf, bool is_interp,
483 				   struct arch_elf_state *state)
484 {
485 	/* Dummy implementation, always proceed */
486 	return 0;
487 }
488 
489 /**
490  * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
491  * @ehdr:	The main ELF header
492  * @has_interp:	True if the ELF has an interpreter, else false.
493  * @state:	Architecture-specific state preserved throughout the process
494  *		of loading the ELF.
495  *
496  * Provides a final opportunity for architecture code to reject the loading
497  * of the ELF & cause an exec syscall to return an error. This is called after
498  * all program headers to be checked by arch_elf_pt_proc have been.
499  *
500  * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
501  *         with that return code.
502  */
503 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
504 				 struct arch_elf_state *state)
505 {
506 	/* Dummy implementation, always proceed */
507 	return 0;
508 }
509 
510 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
511 
512 /* This is much more generalized than the library routine read function,
513    so we keep this separate.  Technically the library read function
514    is only provided so that we can read a.out libraries that have
515    an ELF header */
516 
517 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
518 		struct file *interpreter, unsigned long *interp_map_addr,
519 		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
520 {
521 	struct elf_phdr *eppnt;
522 	unsigned long load_addr = 0;
523 	int load_addr_set = 0;
524 	unsigned long last_bss = 0, elf_bss = 0;
525 	unsigned long error = ~0UL;
526 	unsigned long total_size;
527 	int i;
528 
529 	/* First of all, some simple consistency checks */
530 	if (interp_elf_ex->e_type != ET_EXEC &&
531 	    interp_elf_ex->e_type != ET_DYN)
532 		goto out;
533 	if (!elf_check_arch(interp_elf_ex))
534 		goto out;
535 	if (!interpreter->f_op->mmap)
536 		goto out;
537 
538 	total_size = total_mapping_size(interp_elf_phdata,
539 					interp_elf_ex->e_phnum);
540 	if (!total_size) {
541 		error = -EINVAL;
542 		goto out;
543 	}
544 
545 	eppnt = interp_elf_phdata;
546 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
547 		if (eppnt->p_type == PT_LOAD) {
548 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
549 			int elf_prot = 0;
550 			unsigned long vaddr = 0;
551 			unsigned long k, map_addr;
552 
553 			if (eppnt->p_flags & PF_R)
554 		    		elf_prot = PROT_READ;
555 			if (eppnt->p_flags & PF_W)
556 				elf_prot |= PROT_WRITE;
557 			if (eppnt->p_flags & PF_X)
558 				elf_prot |= PROT_EXEC;
559 			vaddr = eppnt->p_vaddr;
560 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
561 				elf_type |= MAP_FIXED;
562 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
563 				load_addr = -vaddr;
564 
565 			map_addr = elf_map(interpreter, load_addr + vaddr,
566 					eppnt, elf_prot, elf_type, total_size);
567 			total_size = 0;
568 			if (!*interp_map_addr)
569 				*interp_map_addr = map_addr;
570 			error = map_addr;
571 			if (BAD_ADDR(map_addr))
572 				goto out;
573 
574 			if (!load_addr_set &&
575 			    interp_elf_ex->e_type == ET_DYN) {
576 				load_addr = map_addr - ELF_PAGESTART(vaddr);
577 				load_addr_set = 1;
578 			}
579 
580 			/*
581 			 * Check to see if the section's size will overflow the
582 			 * allowed task size. Note that p_filesz must always be
583 			 * <= p_memsize so it's only necessary to check p_memsz.
584 			 */
585 			k = load_addr + eppnt->p_vaddr;
586 			if (BAD_ADDR(k) ||
587 			    eppnt->p_filesz > eppnt->p_memsz ||
588 			    eppnt->p_memsz > TASK_SIZE ||
589 			    TASK_SIZE - eppnt->p_memsz < k) {
590 				error = -ENOMEM;
591 				goto out;
592 			}
593 
594 			/*
595 			 * Find the end of the file mapping for this phdr, and
596 			 * keep track of the largest address we see for this.
597 			 */
598 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
599 			if (k > elf_bss)
600 				elf_bss = k;
601 
602 			/*
603 			 * Do the same thing for the memory mapping - between
604 			 * elf_bss and last_bss is the bss section.
605 			 */
606 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
607 			if (k > last_bss)
608 				last_bss = k;
609 		}
610 	}
611 
612 	if (last_bss > elf_bss) {
613 		/*
614 		 * Now fill out the bss section.  First pad the last page up
615 		 * to the page boundary, and then perform a mmap to make sure
616 		 * that there are zero-mapped pages up to and including the
617 		 * last bss page.
618 		 */
619 		if (padzero(elf_bss)) {
620 			error = -EFAULT;
621 			goto out;
622 		}
623 
624 		/* What we have mapped so far */
625 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
626 
627 		/* Map the last of the bss segment */
628 		error = vm_brk(elf_bss, last_bss - elf_bss);
629 		if (BAD_ADDR(error))
630 			goto out;
631 	}
632 
633 	error = load_addr;
634 out:
635 	return error;
636 }
637 
638 /*
639  * These are the functions used to load ELF style executables and shared
640  * libraries.  There is no binary dependent code anywhere else.
641  */
642 
643 #ifndef STACK_RND_MASK
644 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
645 #endif
646 
647 static unsigned long randomize_stack_top(unsigned long stack_top)
648 {
649 	unsigned long random_variable = 0;
650 
651 	if ((current->flags & PF_RANDOMIZE) &&
652 		!(current->personality & ADDR_NO_RANDOMIZE)) {
653 		random_variable = (unsigned long) get_random_int();
654 		random_variable &= STACK_RND_MASK;
655 		random_variable <<= PAGE_SHIFT;
656 	}
657 #ifdef CONFIG_STACK_GROWSUP
658 	return PAGE_ALIGN(stack_top) + random_variable;
659 #else
660 	return PAGE_ALIGN(stack_top) - random_variable;
661 #endif
662 }
663 
664 static int load_elf_binary(struct linux_binprm *bprm)
665 {
666 	struct file *interpreter = NULL; /* to shut gcc up */
667  	unsigned long load_addr = 0, load_bias = 0;
668 	int load_addr_set = 0;
669 	char * elf_interpreter = NULL;
670 	unsigned long error;
671 	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
672 	unsigned long elf_bss, elf_brk;
673 	int retval, i;
674 	unsigned long elf_entry;
675 	unsigned long interp_load_addr = 0;
676 	unsigned long start_code, end_code, start_data, end_data;
677 	unsigned long reloc_func_desc __maybe_unused = 0;
678 	int executable_stack = EXSTACK_DEFAULT;
679 	struct pt_regs *regs = current_pt_regs();
680 	struct {
681 		struct elfhdr elf_ex;
682 		struct elfhdr interp_elf_ex;
683 	} *loc;
684 	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
685 
686 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
687 	if (!loc) {
688 		retval = -ENOMEM;
689 		goto out_ret;
690 	}
691 
692 	/* Get the exec-header */
693 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
694 
695 	retval = -ENOEXEC;
696 	/* First of all, some simple consistency checks */
697 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698 		goto out;
699 
700 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
701 		goto out;
702 	if (!elf_check_arch(&loc->elf_ex))
703 		goto out;
704 	if (!bprm->file->f_op->mmap)
705 		goto out;
706 
707 	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
708 	if (!elf_phdata)
709 		goto out;
710 
711 	elf_ppnt = elf_phdata;
712 	elf_bss = 0;
713 	elf_brk = 0;
714 
715 	start_code = ~0UL;
716 	end_code = 0;
717 	start_data = 0;
718 	end_data = 0;
719 
720 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
721 		if (elf_ppnt->p_type == PT_INTERP) {
722 			/* This is the program interpreter used for
723 			 * shared libraries - for now assume that this
724 			 * is an a.out format binary
725 			 */
726 			retval = -ENOEXEC;
727 			if (elf_ppnt->p_filesz > PATH_MAX ||
728 			    elf_ppnt->p_filesz < 2)
729 				goto out_free_ph;
730 
731 			retval = -ENOMEM;
732 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
733 						  GFP_KERNEL);
734 			if (!elf_interpreter)
735 				goto out_free_ph;
736 
737 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
738 					     elf_interpreter,
739 					     elf_ppnt->p_filesz);
740 			if (retval != elf_ppnt->p_filesz) {
741 				if (retval >= 0)
742 					retval = -EIO;
743 				goto out_free_interp;
744 			}
745 			/* make sure path is NULL terminated */
746 			retval = -ENOEXEC;
747 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
748 				goto out_free_interp;
749 
750 			interpreter = open_exec(elf_interpreter);
751 			retval = PTR_ERR(interpreter);
752 			if (IS_ERR(interpreter))
753 				goto out_free_interp;
754 
755 			/*
756 			 * If the binary is not readable then enforce
757 			 * mm->dumpable = 0 regardless of the interpreter's
758 			 * permissions.
759 			 */
760 			would_dump(bprm, interpreter);
761 
762 			retval = kernel_read(interpreter, 0, bprm->buf,
763 					     BINPRM_BUF_SIZE);
764 			if (retval != BINPRM_BUF_SIZE) {
765 				if (retval >= 0)
766 					retval = -EIO;
767 				goto out_free_dentry;
768 			}
769 
770 			/* Get the exec headers */
771 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
772 			break;
773 		}
774 		elf_ppnt++;
775 	}
776 
777 	elf_ppnt = elf_phdata;
778 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
779 		switch (elf_ppnt->p_type) {
780 		case PT_GNU_STACK:
781 			if (elf_ppnt->p_flags & PF_X)
782 				executable_stack = EXSTACK_ENABLE_X;
783 			else
784 				executable_stack = EXSTACK_DISABLE_X;
785 			break;
786 
787 		case PT_LOPROC ... PT_HIPROC:
788 			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
789 						  bprm->file, false,
790 						  &arch_state);
791 			if (retval)
792 				goto out_free_dentry;
793 			break;
794 		}
795 
796 	/* Some simple consistency checks for the interpreter */
797 	if (elf_interpreter) {
798 		retval = -ELIBBAD;
799 		/* Not an ELF interpreter */
800 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
801 			goto out_free_dentry;
802 		/* Verify the interpreter has a valid arch */
803 		if (!elf_check_arch(&loc->interp_elf_ex))
804 			goto out_free_dentry;
805 
806 		/* Load the interpreter program headers */
807 		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
808 						   interpreter);
809 		if (!interp_elf_phdata)
810 			goto out_free_dentry;
811 
812 		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
813 		elf_ppnt = interp_elf_phdata;
814 		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
815 			switch (elf_ppnt->p_type) {
816 			case PT_LOPROC ... PT_HIPROC:
817 				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
818 							  elf_ppnt, interpreter,
819 							  true, &arch_state);
820 				if (retval)
821 					goto out_free_dentry;
822 				break;
823 			}
824 	}
825 
826 	/*
827 	 * Allow arch code to reject the ELF at this point, whilst it's
828 	 * still possible to return an error to the code that invoked
829 	 * the exec syscall.
830 	 */
831 	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
832 	if (retval)
833 		goto out_free_dentry;
834 
835 	/* Flush all traces of the currently running executable */
836 	retval = flush_old_exec(bprm);
837 	if (retval)
838 		goto out_free_dentry;
839 
840 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 	   may depend on the personality.  */
842 	SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 		current->personality |= READ_IMPLIES_EXEC;
845 
846 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 		current->flags |= PF_RANDOMIZE;
848 
849 	setup_new_exec(bprm);
850 
851 	/* Do this so that we can load the interpreter, if need be.  We will
852 	   change some of these later */
853 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
854 				 executable_stack);
855 	if (retval < 0)
856 		goto out_free_dentry;
857 
858 	current->mm->start_stack = bprm->p;
859 
860 	/* Now we do a little grungy work by mmapping the ELF image into
861 	   the correct location in memory. */
862 	for(i = 0, elf_ppnt = elf_phdata;
863 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
864 		int elf_prot = 0, elf_flags;
865 		unsigned long k, vaddr;
866 		unsigned long total_size = 0;
867 
868 		if (elf_ppnt->p_type != PT_LOAD)
869 			continue;
870 
871 		if (unlikely (elf_brk > elf_bss)) {
872 			unsigned long nbyte;
873 
874 			/* There was a PT_LOAD segment with p_memsz > p_filesz
875 			   before this one. Map anonymous pages, if needed,
876 			   and clear the area.  */
877 			retval = set_brk(elf_bss + load_bias,
878 					 elf_brk + load_bias);
879 			if (retval)
880 				goto out_free_dentry;
881 			nbyte = ELF_PAGEOFFSET(elf_bss);
882 			if (nbyte) {
883 				nbyte = ELF_MIN_ALIGN - nbyte;
884 				if (nbyte > elf_brk - elf_bss)
885 					nbyte = elf_brk - elf_bss;
886 				if (clear_user((void __user *)elf_bss +
887 							load_bias, nbyte)) {
888 					/*
889 					 * This bss-zeroing can fail if the ELF
890 					 * file specifies odd protections. So
891 					 * we don't check the return value
892 					 */
893 				}
894 			}
895 		}
896 
897 		if (elf_ppnt->p_flags & PF_R)
898 			elf_prot |= PROT_READ;
899 		if (elf_ppnt->p_flags & PF_W)
900 			elf_prot |= PROT_WRITE;
901 		if (elf_ppnt->p_flags & PF_X)
902 			elf_prot |= PROT_EXEC;
903 
904 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
905 
906 		vaddr = elf_ppnt->p_vaddr;
907 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 			elf_flags |= MAP_FIXED;
909 		} else if (loc->elf_ex.e_type == ET_DYN) {
910 			/* Try and get dynamic programs out of the way of the
911 			 * default mmap base, as well as whatever program they
912 			 * might try to exec.  This is because the brk will
913 			 * follow the loader, and is not movable.  */
914 			load_bias = ELF_ET_DYN_BASE - vaddr;
915 			if (current->flags & PF_RANDOMIZE)
916 				load_bias += arch_mmap_rnd();
917 			load_bias = ELF_PAGESTART(load_bias);
918 			total_size = total_mapping_size(elf_phdata,
919 							loc->elf_ex.e_phnum);
920 			if (!total_size) {
921 				error = -EINVAL;
922 				goto out_free_dentry;
923 			}
924 		}
925 
926 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
927 				elf_prot, elf_flags, total_size);
928 		if (BAD_ADDR(error)) {
929 			retval = IS_ERR((void *)error) ?
930 				PTR_ERR((void*)error) : -EINVAL;
931 			goto out_free_dentry;
932 		}
933 
934 		if (!load_addr_set) {
935 			load_addr_set = 1;
936 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
937 			if (loc->elf_ex.e_type == ET_DYN) {
938 				load_bias += error -
939 				             ELF_PAGESTART(load_bias + vaddr);
940 				load_addr += load_bias;
941 				reloc_func_desc = load_bias;
942 			}
943 		}
944 		k = elf_ppnt->p_vaddr;
945 		if (k < start_code)
946 			start_code = k;
947 		if (start_data < k)
948 			start_data = k;
949 
950 		/*
951 		 * Check to see if the section's size will overflow the
952 		 * allowed task size. Note that p_filesz must always be
953 		 * <= p_memsz so it is only necessary to check p_memsz.
954 		 */
955 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
956 		    elf_ppnt->p_memsz > TASK_SIZE ||
957 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
958 			/* set_brk can never work. Avoid overflows. */
959 			retval = -EINVAL;
960 			goto out_free_dentry;
961 		}
962 
963 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
964 
965 		if (k > elf_bss)
966 			elf_bss = k;
967 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
968 			end_code = k;
969 		if (end_data < k)
970 			end_data = k;
971 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
972 		if (k > elf_brk)
973 			elf_brk = k;
974 	}
975 
976 	loc->elf_ex.e_entry += load_bias;
977 	elf_bss += load_bias;
978 	elf_brk += load_bias;
979 	start_code += load_bias;
980 	end_code += load_bias;
981 	start_data += load_bias;
982 	end_data += load_bias;
983 
984 	/* Calling set_brk effectively mmaps the pages that we need
985 	 * for the bss and break sections.  We must do this before
986 	 * mapping in the interpreter, to make sure it doesn't wind
987 	 * up getting placed where the bss needs to go.
988 	 */
989 	retval = set_brk(elf_bss, elf_brk);
990 	if (retval)
991 		goto out_free_dentry;
992 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
993 		retval = -EFAULT; /* Nobody gets to see this, but.. */
994 		goto out_free_dentry;
995 	}
996 
997 	if (elf_interpreter) {
998 		unsigned long interp_map_addr = 0;
999 
1000 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1001 					    interpreter,
1002 					    &interp_map_addr,
1003 					    load_bias, interp_elf_phdata);
1004 		if (!IS_ERR((void *)elf_entry)) {
1005 			/*
1006 			 * load_elf_interp() returns relocation
1007 			 * adjustment
1008 			 */
1009 			interp_load_addr = elf_entry;
1010 			elf_entry += loc->interp_elf_ex.e_entry;
1011 		}
1012 		if (BAD_ADDR(elf_entry)) {
1013 			retval = IS_ERR((void *)elf_entry) ?
1014 					(int)elf_entry : -EINVAL;
1015 			goto out_free_dentry;
1016 		}
1017 		reloc_func_desc = interp_load_addr;
1018 
1019 		allow_write_access(interpreter);
1020 		fput(interpreter);
1021 		kfree(elf_interpreter);
1022 	} else {
1023 		elf_entry = loc->elf_ex.e_entry;
1024 		if (BAD_ADDR(elf_entry)) {
1025 			retval = -EINVAL;
1026 			goto out_free_dentry;
1027 		}
1028 	}
1029 
1030 	kfree(interp_elf_phdata);
1031 	kfree(elf_phdata);
1032 
1033 	set_binfmt(&elf_format);
1034 
1035 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1037 	if (retval < 0)
1038 		goto out;
1039 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1040 
1041 	install_exec_creds(bprm);
1042 	retval = create_elf_tables(bprm, &loc->elf_ex,
1043 			  load_addr, interp_load_addr);
1044 	if (retval < 0)
1045 		goto out;
1046 	/* N.B. passed_fileno might not be initialized? */
1047 	current->mm->end_code = end_code;
1048 	current->mm->start_code = start_code;
1049 	current->mm->start_data = start_data;
1050 	current->mm->end_data = end_data;
1051 	current->mm->start_stack = bprm->p;
1052 
1053 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054 		current->mm->brk = current->mm->start_brk =
1055 			arch_randomize_brk(current->mm);
1056 #ifdef compat_brk_randomized
1057 		current->brk_randomized = 1;
1058 #endif
1059 	}
1060 
1061 	if (current->personality & MMAP_PAGE_ZERO) {
1062 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1063 		   and some applications "depend" upon this behavior.
1064 		   Since we do not have the power to recompile these, we
1065 		   emulate the SVr4 behavior. Sigh. */
1066 		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067 				MAP_FIXED | MAP_PRIVATE, 0);
1068 	}
1069 
1070 #ifdef ELF_PLAT_INIT
1071 	/*
1072 	 * The ABI may specify that certain registers be set up in special
1073 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1074 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1075 	 * that the e_entry field is the address of the function descriptor
1076 	 * for the startup routine, rather than the address of the startup
1077 	 * routine itself.  This macro performs whatever initialization to
1078 	 * the regs structure is required as well as any relocations to the
1079 	 * function descriptor entries when executing dynamically links apps.
1080 	 */
1081 	ELF_PLAT_INIT(regs, reloc_func_desc);
1082 #endif
1083 
1084 	start_thread(regs, elf_entry, bprm->p);
1085 	retval = 0;
1086 out:
1087 	kfree(loc);
1088 out_ret:
1089 	return retval;
1090 
1091 	/* error cleanup */
1092 out_free_dentry:
1093 	kfree(interp_elf_phdata);
1094 	allow_write_access(interpreter);
1095 	if (interpreter)
1096 		fput(interpreter);
1097 out_free_interp:
1098 	kfree(elf_interpreter);
1099 out_free_ph:
1100 	kfree(elf_phdata);
1101 	goto out;
1102 }
1103 
1104 #ifdef CONFIG_USELIB
1105 /* This is really simpleminded and specialized - we are loading an
1106    a.out library that is given an ELF header. */
1107 static int load_elf_library(struct file *file)
1108 {
1109 	struct elf_phdr *elf_phdata;
1110 	struct elf_phdr *eppnt;
1111 	unsigned long elf_bss, bss, len;
1112 	int retval, error, i, j;
1113 	struct elfhdr elf_ex;
1114 
1115 	error = -ENOEXEC;
1116 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117 	if (retval != sizeof(elf_ex))
1118 		goto out;
1119 
1120 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1121 		goto out;
1122 
1123 	/* First of all, some simple consistency checks */
1124 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125 	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1126 		goto out;
1127 
1128 	/* Now read in all of the header information */
1129 
1130 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1132 
1133 	error = -ENOMEM;
1134 	elf_phdata = kmalloc(j, GFP_KERNEL);
1135 	if (!elf_phdata)
1136 		goto out;
1137 
1138 	eppnt = elf_phdata;
1139 	error = -ENOEXEC;
1140 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1141 	if (retval != j)
1142 		goto out_free_ph;
1143 
1144 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145 		if ((eppnt + i)->p_type == PT_LOAD)
1146 			j++;
1147 	if (j != 1)
1148 		goto out_free_ph;
1149 
1150 	while (eppnt->p_type != PT_LOAD)
1151 		eppnt++;
1152 
1153 	/* Now use mmap to map the library into memory. */
1154 	error = vm_mmap(file,
1155 			ELF_PAGESTART(eppnt->p_vaddr),
1156 			(eppnt->p_filesz +
1157 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158 			PROT_READ | PROT_WRITE | PROT_EXEC,
1159 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1160 			(eppnt->p_offset -
1161 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1163 		goto out_free_ph;
1164 
1165 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166 	if (padzero(elf_bss)) {
1167 		error = -EFAULT;
1168 		goto out_free_ph;
1169 	}
1170 
1171 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1172 			    ELF_MIN_ALIGN - 1);
1173 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1174 	if (bss > len)
1175 		vm_brk(len, bss - len);
1176 	error = 0;
1177 
1178 out_free_ph:
1179 	kfree(elf_phdata);
1180 out:
1181 	return error;
1182 }
1183 #endif /* #ifdef CONFIG_USELIB */
1184 
1185 #ifdef CONFIG_ELF_CORE
1186 /*
1187  * ELF core dumper
1188  *
1189  * Modelled on fs/exec.c:aout_core_dump()
1190  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1191  */
1192 
1193 /*
1194  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195  * that are useful for post-mortem analysis are included in every core dump.
1196  * In that way we ensure that the core dump is fully interpretable later
1197  * without matching up the same kernel and hardware config to see what PC values
1198  * meant. These special mappings include - vDSO, vsyscall, and other
1199  * architecture specific mappings
1200  */
1201 static bool always_dump_vma(struct vm_area_struct *vma)
1202 {
1203 	/* Any vsyscall mappings? */
1204 	if (vma == get_gate_vma(vma->vm_mm))
1205 		return true;
1206 
1207 	/*
1208 	 * Assume that all vmas with a .name op should always be dumped.
1209 	 * If this changes, a new vm_ops field can easily be added.
1210 	 */
1211 	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1212 		return true;
1213 
1214 	/*
1215 	 * arch_vma_name() returns non-NULL for special architecture mappings,
1216 	 * such as vDSO sections.
1217 	 */
1218 	if (arch_vma_name(vma))
1219 		return true;
1220 
1221 	return false;
1222 }
1223 
1224 /*
1225  * Decide what to dump of a segment, part, all or none.
1226  */
1227 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228 				   unsigned long mm_flags)
1229 {
1230 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1231 
1232 	/* always dump the vdso and vsyscall sections */
1233 	if (always_dump_vma(vma))
1234 		goto whole;
1235 
1236 	if (vma->vm_flags & VM_DONTDUMP)
1237 		return 0;
1238 
1239 	/* Hugetlb memory check */
1240 	if (vma->vm_flags & VM_HUGETLB) {
1241 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1242 			goto whole;
1243 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1244 			goto whole;
1245 		return 0;
1246 	}
1247 
1248 	/* Do not dump I/O mapped devices or special mappings */
1249 	if (vma->vm_flags & VM_IO)
1250 		return 0;
1251 
1252 	/* By default, dump shared memory if mapped from an anonymous file. */
1253 	if (vma->vm_flags & VM_SHARED) {
1254 		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1256 			goto whole;
1257 		return 0;
1258 	}
1259 
1260 	/* Dump segments that have been written to.  */
1261 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1262 		goto whole;
1263 	if (vma->vm_file == NULL)
1264 		return 0;
1265 
1266 	if (FILTER(MAPPED_PRIVATE))
1267 		goto whole;
1268 
1269 	/*
1270 	 * If this looks like the beginning of a DSO or executable mapping,
1271 	 * check for an ELF header.  If we find one, dump the first page to
1272 	 * aid in determining what was mapped here.
1273 	 */
1274 	if (FILTER(ELF_HEADERS) &&
1275 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276 		u32 __user *header = (u32 __user *) vma->vm_start;
1277 		u32 word;
1278 		mm_segment_t fs = get_fs();
1279 		/*
1280 		 * Doing it this way gets the constant folded by GCC.
1281 		 */
1282 		union {
1283 			u32 cmp;
1284 			char elfmag[SELFMAG];
1285 		} magic;
1286 		BUILD_BUG_ON(SELFMAG != sizeof word);
1287 		magic.elfmag[EI_MAG0] = ELFMAG0;
1288 		magic.elfmag[EI_MAG1] = ELFMAG1;
1289 		magic.elfmag[EI_MAG2] = ELFMAG2;
1290 		magic.elfmag[EI_MAG3] = ELFMAG3;
1291 		/*
1292 		 * Switch to the user "segment" for get_user(),
1293 		 * then put back what elf_core_dump() had in place.
1294 		 */
1295 		set_fs(USER_DS);
1296 		if (unlikely(get_user(word, header)))
1297 			word = 0;
1298 		set_fs(fs);
1299 		if (word == magic.cmp)
1300 			return PAGE_SIZE;
1301 	}
1302 
1303 #undef	FILTER
1304 
1305 	return 0;
1306 
1307 whole:
1308 	return vma->vm_end - vma->vm_start;
1309 }
1310 
1311 /* An ELF note in memory */
1312 struct memelfnote
1313 {
1314 	const char *name;
1315 	int type;
1316 	unsigned int datasz;
1317 	void *data;
1318 };
1319 
1320 static int notesize(struct memelfnote *en)
1321 {
1322 	int sz;
1323 
1324 	sz = sizeof(struct elf_note);
1325 	sz += roundup(strlen(en->name) + 1, 4);
1326 	sz += roundup(en->datasz, 4);
1327 
1328 	return sz;
1329 }
1330 
1331 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1332 {
1333 	struct elf_note en;
1334 	en.n_namesz = strlen(men->name) + 1;
1335 	en.n_descsz = men->datasz;
1336 	en.n_type = men->type;
1337 
1338 	return dump_emit(cprm, &en, sizeof(en)) &&
1339 	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340 	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1341 }
1342 
1343 static void fill_elf_header(struct elfhdr *elf, int segs,
1344 			    u16 machine, u32 flags)
1345 {
1346 	memset(elf, 0, sizeof(*elf));
1347 
1348 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1350 	elf->e_ident[EI_DATA] = ELF_DATA;
1351 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1352 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1353 
1354 	elf->e_type = ET_CORE;
1355 	elf->e_machine = machine;
1356 	elf->e_version = EV_CURRENT;
1357 	elf->e_phoff = sizeof(struct elfhdr);
1358 	elf->e_flags = flags;
1359 	elf->e_ehsize = sizeof(struct elfhdr);
1360 	elf->e_phentsize = sizeof(struct elf_phdr);
1361 	elf->e_phnum = segs;
1362 
1363 	return;
1364 }
1365 
1366 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1367 {
1368 	phdr->p_type = PT_NOTE;
1369 	phdr->p_offset = offset;
1370 	phdr->p_vaddr = 0;
1371 	phdr->p_paddr = 0;
1372 	phdr->p_filesz = sz;
1373 	phdr->p_memsz = 0;
1374 	phdr->p_flags = 0;
1375 	phdr->p_align = 0;
1376 	return;
1377 }
1378 
1379 static void fill_note(struct memelfnote *note, const char *name, int type,
1380 		unsigned int sz, void *data)
1381 {
1382 	note->name = name;
1383 	note->type = type;
1384 	note->datasz = sz;
1385 	note->data = data;
1386 	return;
1387 }
1388 
1389 /*
1390  * fill up all the fields in prstatus from the given task struct, except
1391  * registers which need to be filled up separately.
1392  */
1393 static void fill_prstatus(struct elf_prstatus *prstatus,
1394 		struct task_struct *p, long signr)
1395 {
1396 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1398 	prstatus->pr_sighold = p->blocked.sig[0];
1399 	rcu_read_lock();
1400 	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1401 	rcu_read_unlock();
1402 	prstatus->pr_pid = task_pid_vnr(p);
1403 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1404 	prstatus->pr_sid = task_session_vnr(p);
1405 	if (thread_group_leader(p)) {
1406 		struct task_cputime cputime;
1407 
1408 		/*
1409 		 * This is the record for the group leader.  It shows the
1410 		 * group-wide total, not its individual thread total.
1411 		 */
1412 		thread_group_cputime(p, &cputime);
1413 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1415 	} else {
1416 		cputime_t utime, stime;
1417 
1418 		task_cputime(p, &utime, &stime);
1419 		cputime_to_timeval(utime, &prstatus->pr_utime);
1420 		cputime_to_timeval(stime, &prstatus->pr_stime);
1421 	}
1422 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1424 }
1425 
1426 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427 		       struct mm_struct *mm)
1428 {
1429 	const struct cred *cred;
1430 	unsigned int i, len;
1431 
1432 	/* first copy the parameters from user space */
1433 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1434 
1435 	len = mm->arg_end - mm->arg_start;
1436 	if (len >= ELF_PRARGSZ)
1437 		len = ELF_PRARGSZ-1;
1438 	if (copy_from_user(&psinfo->pr_psargs,
1439 		           (const char __user *)mm->arg_start, len))
1440 		return -EFAULT;
1441 	for(i = 0; i < len; i++)
1442 		if (psinfo->pr_psargs[i] == 0)
1443 			psinfo->pr_psargs[i] = ' ';
1444 	psinfo->pr_psargs[len] = 0;
1445 
1446 	rcu_read_lock();
1447 	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1448 	rcu_read_unlock();
1449 	psinfo->pr_pid = task_pid_vnr(p);
1450 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1451 	psinfo->pr_sid = task_session_vnr(p);
1452 
1453 	i = p->state ? ffz(~p->state) + 1 : 0;
1454 	psinfo->pr_state = i;
1455 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457 	psinfo->pr_nice = task_nice(p);
1458 	psinfo->pr_flag = p->flags;
1459 	rcu_read_lock();
1460 	cred = __task_cred(p);
1461 	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1463 	rcu_read_unlock();
1464 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1465 
1466 	return 0;
1467 }
1468 
1469 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1470 {
1471 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1472 	int i = 0;
1473 	do
1474 		i += 2;
1475 	while (auxv[i - 2] != AT_NULL);
1476 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1477 }
1478 
1479 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480 		const siginfo_t *siginfo)
1481 {
1482 	mm_segment_t old_fs = get_fs();
1483 	set_fs(KERNEL_DS);
1484 	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1485 	set_fs(old_fs);
1486 	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1487 }
1488 
1489 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1490 /*
1491  * Format of NT_FILE note:
1492  *
1493  * long count     -- how many files are mapped
1494  * long page_size -- units for file_ofs
1495  * array of [COUNT] elements of
1496  *   long start
1497  *   long end
1498  *   long file_ofs
1499  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1500  */
1501 static int fill_files_note(struct memelfnote *note)
1502 {
1503 	struct vm_area_struct *vma;
1504 	unsigned count, size, names_ofs, remaining, n;
1505 	user_long_t *data;
1506 	user_long_t *start_end_ofs;
1507 	char *name_base, *name_curpos;
1508 
1509 	/* *Estimated* file count and total data size needed */
1510 	count = current->mm->map_count;
1511 	size = count * 64;
1512 
1513 	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1514  alloc:
1515 	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1516 		return -EINVAL;
1517 	size = round_up(size, PAGE_SIZE);
1518 	data = vmalloc(size);
1519 	if (!data)
1520 		return -ENOMEM;
1521 
1522 	start_end_ofs = data + 2;
1523 	name_base = name_curpos = ((char *)data) + names_ofs;
1524 	remaining = size - names_ofs;
1525 	count = 0;
1526 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1527 		struct file *file;
1528 		const char *filename;
1529 
1530 		file = vma->vm_file;
1531 		if (!file)
1532 			continue;
1533 		filename = d_path(&file->f_path, name_curpos, remaining);
1534 		if (IS_ERR(filename)) {
1535 			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1536 				vfree(data);
1537 				size = size * 5 / 4;
1538 				goto alloc;
1539 			}
1540 			continue;
1541 		}
1542 
1543 		/* d_path() fills at the end, move name down */
1544 		/* n = strlen(filename) + 1: */
1545 		n = (name_curpos + remaining) - filename;
1546 		remaining = filename - name_curpos;
1547 		memmove(name_curpos, filename, n);
1548 		name_curpos += n;
1549 
1550 		*start_end_ofs++ = vma->vm_start;
1551 		*start_end_ofs++ = vma->vm_end;
1552 		*start_end_ofs++ = vma->vm_pgoff;
1553 		count++;
1554 	}
1555 
1556 	/* Now we know exact count of files, can store it */
1557 	data[0] = count;
1558 	data[1] = PAGE_SIZE;
1559 	/*
1560 	 * Count usually is less than current->mm->map_count,
1561 	 * we need to move filenames down.
1562 	 */
1563 	n = current->mm->map_count - count;
1564 	if (n != 0) {
1565 		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566 		memmove(name_base - shift_bytes, name_base,
1567 			name_curpos - name_base);
1568 		name_curpos -= shift_bytes;
1569 	}
1570 
1571 	size = name_curpos - (char *)data;
1572 	fill_note(note, "CORE", NT_FILE, size, data);
1573 	return 0;
1574 }
1575 
1576 #ifdef CORE_DUMP_USE_REGSET
1577 #include <linux/regset.h>
1578 
1579 struct elf_thread_core_info {
1580 	struct elf_thread_core_info *next;
1581 	struct task_struct *task;
1582 	struct elf_prstatus prstatus;
1583 	struct memelfnote notes[0];
1584 };
1585 
1586 struct elf_note_info {
1587 	struct elf_thread_core_info *thread;
1588 	struct memelfnote psinfo;
1589 	struct memelfnote signote;
1590 	struct memelfnote auxv;
1591 	struct memelfnote files;
1592 	user_siginfo_t csigdata;
1593 	size_t size;
1594 	int thread_notes;
1595 };
1596 
1597 /*
1598  * When a regset has a writeback hook, we call it on each thread before
1599  * dumping user memory.  On register window machines, this makes sure the
1600  * user memory backing the register data is up to date before we read it.
1601  */
1602 static void do_thread_regset_writeback(struct task_struct *task,
1603 				       const struct user_regset *regset)
1604 {
1605 	if (regset->writeback)
1606 		regset->writeback(task, regset, 1);
1607 }
1608 
1609 #ifndef PR_REG_SIZE
1610 #define PR_REG_SIZE(S) sizeof(S)
1611 #endif
1612 
1613 #ifndef PRSTATUS_SIZE
1614 #define PRSTATUS_SIZE(S) sizeof(S)
1615 #endif
1616 
1617 #ifndef PR_REG_PTR
1618 #define PR_REG_PTR(S) (&((S)->pr_reg))
1619 #endif
1620 
1621 #ifndef SET_PR_FPVALID
1622 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1623 #endif
1624 
1625 static int fill_thread_core_info(struct elf_thread_core_info *t,
1626 				 const struct user_regset_view *view,
1627 				 long signr, size_t *total)
1628 {
1629 	unsigned int i;
1630 
1631 	/*
1632 	 * NT_PRSTATUS is the one special case, because the regset data
1633 	 * goes into the pr_reg field inside the note contents, rather
1634 	 * than being the whole note contents.  We fill the reset in here.
1635 	 * We assume that regset 0 is NT_PRSTATUS.
1636 	 */
1637 	fill_prstatus(&t->prstatus, t->task, signr);
1638 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1639 				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1640 				    PR_REG_PTR(&t->prstatus), NULL);
1641 
1642 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643 		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644 	*total += notesize(&t->notes[0]);
1645 
1646 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1647 
1648 	/*
1649 	 * Each other regset might generate a note too.  For each regset
1650 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1651 	 * all zero and we'll know to skip writing it later.
1652 	 */
1653 	for (i = 1; i < view->n; ++i) {
1654 		const struct user_regset *regset = &view->regsets[i];
1655 		do_thread_regset_writeback(t->task, regset);
1656 		if (regset->core_note_type && regset->get &&
1657 		    (!regset->active || regset->active(t->task, regset))) {
1658 			int ret;
1659 			size_t size = regset->n * regset->size;
1660 			void *data = kmalloc(size, GFP_KERNEL);
1661 			if (unlikely(!data))
1662 				return 0;
1663 			ret = regset->get(t->task, regset,
1664 					  0, size, data, NULL);
1665 			if (unlikely(ret))
1666 				kfree(data);
1667 			else {
1668 				if (regset->core_note_type != NT_PRFPREG)
1669 					fill_note(&t->notes[i], "LINUX",
1670 						  regset->core_note_type,
1671 						  size, data);
1672 				else {
1673 					SET_PR_FPVALID(&t->prstatus, 1);
1674 					fill_note(&t->notes[i], "CORE",
1675 						  NT_PRFPREG, size, data);
1676 				}
1677 				*total += notesize(&t->notes[i]);
1678 			}
1679 		}
1680 	}
1681 
1682 	return 1;
1683 }
1684 
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 			  struct elf_note_info *info,
1687 			  const siginfo_t *siginfo, struct pt_regs *regs)
1688 {
1689 	struct task_struct *dump_task = current;
1690 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1691 	struct elf_thread_core_info *t;
1692 	struct elf_prpsinfo *psinfo;
1693 	struct core_thread *ct;
1694 	unsigned int i;
1695 
1696 	info->size = 0;
1697 	info->thread = NULL;
1698 
1699 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700 	if (psinfo == NULL) {
1701 		info->psinfo.data = NULL; /* So we don't free this wrongly */
1702 		return 0;
1703 	}
1704 
1705 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1706 
1707 	/*
1708 	 * Figure out how many notes we're going to need for each thread.
1709 	 */
1710 	info->thread_notes = 0;
1711 	for (i = 0; i < view->n; ++i)
1712 		if (view->regsets[i].core_note_type != 0)
1713 			++info->thread_notes;
1714 
1715 	/*
1716 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1717 	 * since it is our one special case.
1718 	 */
1719 	if (unlikely(info->thread_notes == 0) ||
1720 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1721 		WARN_ON(1);
1722 		return 0;
1723 	}
1724 
1725 	/*
1726 	 * Initialize the ELF file header.
1727 	 */
1728 	fill_elf_header(elf, phdrs,
1729 			view->e_machine, view->e_flags);
1730 
1731 	/*
1732 	 * Allocate a structure for each thread.
1733 	 */
1734 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735 		t = kzalloc(offsetof(struct elf_thread_core_info,
1736 				     notes[info->thread_notes]),
1737 			    GFP_KERNEL);
1738 		if (unlikely(!t))
1739 			return 0;
1740 
1741 		t->task = ct->task;
1742 		if (ct->task == dump_task || !info->thread) {
1743 			t->next = info->thread;
1744 			info->thread = t;
1745 		} else {
1746 			/*
1747 			 * Make sure to keep the original task at
1748 			 * the head of the list.
1749 			 */
1750 			t->next = info->thread->next;
1751 			info->thread->next = t;
1752 		}
1753 	}
1754 
1755 	/*
1756 	 * Now fill in each thread's information.
1757 	 */
1758 	for (t = info->thread; t != NULL; t = t->next)
1759 		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1760 			return 0;
1761 
1762 	/*
1763 	 * Fill in the two process-wide notes.
1764 	 */
1765 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766 	info->size += notesize(&info->psinfo);
1767 
1768 	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769 	info->size += notesize(&info->signote);
1770 
1771 	fill_auxv_note(&info->auxv, current->mm);
1772 	info->size += notesize(&info->auxv);
1773 
1774 	if (fill_files_note(&info->files) == 0)
1775 		info->size += notesize(&info->files);
1776 
1777 	return 1;
1778 }
1779 
1780 static size_t get_note_info_size(struct elf_note_info *info)
1781 {
1782 	return info->size;
1783 }
1784 
1785 /*
1786  * Write all the notes for each thread.  When writing the first thread, the
1787  * process-wide notes are interleaved after the first thread-specific note.
1788  */
1789 static int write_note_info(struct elf_note_info *info,
1790 			   struct coredump_params *cprm)
1791 {
1792 	bool first = true;
1793 	struct elf_thread_core_info *t = info->thread;
1794 
1795 	do {
1796 		int i;
1797 
1798 		if (!writenote(&t->notes[0], cprm))
1799 			return 0;
1800 
1801 		if (first && !writenote(&info->psinfo, cprm))
1802 			return 0;
1803 		if (first && !writenote(&info->signote, cprm))
1804 			return 0;
1805 		if (first && !writenote(&info->auxv, cprm))
1806 			return 0;
1807 		if (first && info->files.data &&
1808 				!writenote(&info->files, cprm))
1809 			return 0;
1810 
1811 		for (i = 1; i < info->thread_notes; ++i)
1812 			if (t->notes[i].data &&
1813 			    !writenote(&t->notes[i], cprm))
1814 				return 0;
1815 
1816 		first = false;
1817 		t = t->next;
1818 	} while (t);
1819 
1820 	return 1;
1821 }
1822 
1823 static void free_note_info(struct elf_note_info *info)
1824 {
1825 	struct elf_thread_core_info *threads = info->thread;
1826 	while (threads) {
1827 		unsigned int i;
1828 		struct elf_thread_core_info *t = threads;
1829 		threads = t->next;
1830 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831 		for (i = 1; i < info->thread_notes; ++i)
1832 			kfree(t->notes[i].data);
1833 		kfree(t);
1834 	}
1835 	kfree(info->psinfo.data);
1836 	vfree(info->files.data);
1837 }
1838 
1839 #else
1840 
1841 /* Here is the structure in which status of each thread is captured. */
1842 struct elf_thread_status
1843 {
1844 	struct list_head list;
1845 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1846 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1847 	struct task_struct *thread;
1848 #ifdef ELF_CORE_COPY_XFPREGS
1849 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1850 #endif
1851 	struct memelfnote notes[3];
1852 	int num_notes;
1853 };
1854 
1855 /*
1856  * In order to add the specific thread information for the elf file format,
1857  * we need to keep a linked list of every threads pr_status and then create
1858  * a single section for them in the final core file.
1859  */
1860 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1861 {
1862 	int sz = 0;
1863 	struct task_struct *p = t->thread;
1864 	t->num_notes = 0;
1865 
1866 	fill_prstatus(&t->prstatus, p, signr);
1867 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1868 
1869 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1870 		  &(t->prstatus));
1871 	t->num_notes++;
1872 	sz += notesize(&t->notes[0]);
1873 
1874 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1875 								&t->fpu))) {
1876 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1877 			  &(t->fpu));
1878 		t->num_notes++;
1879 		sz += notesize(&t->notes[1]);
1880 	}
1881 
1882 #ifdef ELF_CORE_COPY_XFPREGS
1883 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885 			  sizeof(t->xfpu), &t->xfpu);
1886 		t->num_notes++;
1887 		sz += notesize(&t->notes[2]);
1888 	}
1889 #endif
1890 	return sz;
1891 }
1892 
1893 struct elf_note_info {
1894 	struct memelfnote *notes;
1895 	struct memelfnote *notes_files;
1896 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1897 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1898 	struct list_head thread_list;
1899 	elf_fpregset_t *fpu;
1900 #ifdef ELF_CORE_COPY_XFPREGS
1901 	elf_fpxregset_t *xfpu;
1902 #endif
1903 	user_siginfo_t csigdata;
1904 	int thread_status_size;
1905 	int numnote;
1906 };
1907 
1908 static int elf_note_info_init(struct elf_note_info *info)
1909 {
1910 	memset(info, 0, sizeof(*info));
1911 	INIT_LIST_HEAD(&info->thread_list);
1912 
1913 	/* Allocate space for ELF notes */
1914 	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1915 	if (!info->notes)
1916 		return 0;
1917 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1918 	if (!info->psinfo)
1919 		return 0;
1920 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921 	if (!info->prstatus)
1922 		return 0;
1923 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1924 	if (!info->fpu)
1925 		return 0;
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1928 	if (!info->xfpu)
1929 		return 0;
1930 #endif
1931 	return 1;
1932 }
1933 
1934 static int fill_note_info(struct elfhdr *elf, int phdrs,
1935 			  struct elf_note_info *info,
1936 			  const siginfo_t *siginfo, struct pt_regs *regs)
1937 {
1938 	struct list_head *t;
1939 	struct core_thread *ct;
1940 	struct elf_thread_status *ets;
1941 
1942 	if (!elf_note_info_init(info))
1943 		return 0;
1944 
1945 	for (ct = current->mm->core_state->dumper.next;
1946 					ct; ct = ct->next) {
1947 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1948 		if (!ets)
1949 			return 0;
1950 
1951 		ets->thread = ct->task;
1952 		list_add(&ets->list, &info->thread_list);
1953 	}
1954 
1955 	list_for_each(t, &info->thread_list) {
1956 		int sz;
1957 
1958 		ets = list_entry(t, struct elf_thread_status, list);
1959 		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960 		info->thread_status_size += sz;
1961 	}
1962 	/* now collect the dump for the current */
1963 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1964 	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1966 
1967 	/* Set up header */
1968 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1969 
1970 	/*
1971 	 * Set up the notes in similar form to SVR4 core dumps made
1972 	 * with info from their /proc.
1973 	 */
1974 
1975 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976 		  sizeof(*info->prstatus), info->prstatus);
1977 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979 		  sizeof(*info->psinfo), info->psinfo);
1980 
1981 	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982 	fill_auxv_note(info->notes + 3, current->mm);
1983 	info->numnote = 4;
1984 
1985 	if (fill_files_note(info->notes + info->numnote) == 0) {
1986 		info->notes_files = info->notes + info->numnote;
1987 		info->numnote++;
1988 	}
1989 
1990 	/* Try to dump the FPU. */
1991 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1992 							       info->fpu);
1993 	if (info->prstatus->pr_fpvalid)
1994 		fill_note(info->notes + info->numnote++,
1995 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996 #ifdef ELF_CORE_COPY_XFPREGS
1997 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998 		fill_note(info->notes + info->numnote++,
1999 			  "LINUX", ELF_CORE_XFPREG_TYPE,
2000 			  sizeof(*info->xfpu), info->xfpu);
2001 #endif
2002 
2003 	return 1;
2004 }
2005 
2006 static size_t get_note_info_size(struct elf_note_info *info)
2007 {
2008 	int sz = 0;
2009 	int i;
2010 
2011 	for (i = 0; i < info->numnote; i++)
2012 		sz += notesize(info->notes + i);
2013 
2014 	sz += info->thread_status_size;
2015 
2016 	return sz;
2017 }
2018 
2019 static int write_note_info(struct elf_note_info *info,
2020 			   struct coredump_params *cprm)
2021 {
2022 	int i;
2023 	struct list_head *t;
2024 
2025 	for (i = 0; i < info->numnote; i++)
2026 		if (!writenote(info->notes + i, cprm))
2027 			return 0;
2028 
2029 	/* write out the thread status notes section */
2030 	list_for_each(t, &info->thread_list) {
2031 		struct elf_thread_status *tmp =
2032 				list_entry(t, struct elf_thread_status, list);
2033 
2034 		for (i = 0; i < tmp->num_notes; i++)
2035 			if (!writenote(&tmp->notes[i], cprm))
2036 				return 0;
2037 	}
2038 
2039 	return 1;
2040 }
2041 
2042 static void free_note_info(struct elf_note_info *info)
2043 {
2044 	while (!list_empty(&info->thread_list)) {
2045 		struct list_head *tmp = info->thread_list.next;
2046 		list_del(tmp);
2047 		kfree(list_entry(tmp, struct elf_thread_status, list));
2048 	}
2049 
2050 	/* Free data possibly allocated by fill_files_note(): */
2051 	if (info->notes_files)
2052 		vfree(info->notes_files->data);
2053 
2054 	kfree(info->prstatus);
2055 	kfree(info->psinfo);
2056 	kfree(info->notes);
2057 	kfree(info->fpu);
2058 #ifdef ELF_CORE_COPY_XFPREGS
2059 	kfree(info->xfpu);
2060 #endif
2061 }
2062 
2063 #endif
2064 
2065 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066 					struct vm_area_struct *gate_vma)
2067 {
2068 	struct vm_area_struct *ret = tsk->mm->mmap;
2069 
2070 	if (ret)
2071 		return ret;
2072 	return gate_vma;
2073 }
2074 /*
2075  * Helper function for iterating across a vma list.  It ensures that the caller
2076  * will visit `gate_vma' prior to terminating the search.
2077  */
2078 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079 					struct vm_area_struct *gate_vma)
2080 {
2081 	struct vm_area_struct *ret;
2082 
2083 	ret = this_vma->vm_next;
2084 	if (ret)
2085 		return ret;
2086 	if (this_vma == gate_vma)
2087 		return NULL;
2088 	return gate_vma;
2089 }
2090 
2091 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092 			     elf_addr_t e_shoff, int segs)
2093 {
2094 	elf->e_shoff = e_shoff;
2095 	elf->e_shentsize = sizeof(*shdr4extnum);
2096 	elf->e_shnum = 1;
2097 	elf->e_shstrndx = SHN_UNDEF;
2098 
2099 	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2100 
2101 	shdr4extnum->sh_type = SHT_NULL;
2102 	shdr4extnum->sh_size = elf->e_shnum;
2103 	shdr4extnum->sh_link = elf->e_shstrndx;
2104 	shdr4extnum->sh_info = segs;
2105 }
2106 
2107 /*
2108  * Actual dumper
2109  *
2110  * This is a two-pass process; first we find the offsets of the bits,
2111  * and then they are actually written out.  If we run out of core limit
2112  * we just truncate.
2113  */
2114 static int elf_core_dump(struct coredump_params *cprm)
2115 {
2116 	int has_dumped = 0;
2117 	mm_segment_t fs;
2118 	int segs, i;
2119 	size_t vma_data_size = 0;
2120 	struct vm_area_struct *vma, *gate_vma;
2121 	struct elfhdr *elf = NULL;
2122 	loff_t offset = 0, dataoff;
2123 	struct elf_note_info info = { };
2124 	struct elf_phdr *phdr4note = NULL;
2125 	struct elf_shdr *shdr4extnum = NULL;
2126 	Elf_Half e_phnum;
2127 	elf_addr_t e_shoff;
2128 	elf_addr_t *vma_filesz = NULL;
2129 
2130 	/*
2131 	 * We no longer stop all VM operations.
2132 	 *
2133 	 * This is because those proceses that could possibly change map_count
2134 	 * or the mmap / vma pages are now blocked in do_exit on current
2135 	 * finishing this core dump.
2136 	 *
2137 	 * Only ptrace can touch these memory addresses, but it doesn't change
2138 	 * the map_count or the pages allocated. So no possibility of crashing
2139 	 * exists while dumping the mm->vm_next areas to the core file.
2140 	 */
2141 
2142 	/* alloc memory for large data structures: too large to be on stack */
2143 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2144 	if (!elf)
2145 		goto out;
2146 	/*
2147 	 * The number of segs are recored into ELF header as 16bit value.
2148 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2149 	 */
2150 	segs = current->mm->map_count;
2151 	segs += elf_core_extra_phdrs();
2152 
2153 	gate_vma = get_gate_vma(current->mm);
2154 	if (gate_vma != NULL)
2155 		segs++;
2156 
2157 	/* for notes section */
2158 	segs++;
2159 
2160 	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161 	 * this, kernel supports extended numbering. Have a look at
2162 	 * include/linux/elf.h for further information. */
2163 	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2164 
2165 	/*
2166 	 * Collect all the non-memory information about the process for the
2167 	 * notes.  This also sets up the file header.
2168 	 */
2169 	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2170 		goto cleanup;
2171 
2172 	has_dumped = 1;
2173 
2174 	fs = get_fs();
2175 	set_fs(KERNEL_DS);
2176 
2177 	offset += sizeof(*elf);				/* Elf header */
2178 	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2179 
2180 	/* Write notes phdr entry */
2181 	{
2182 		size_t sz = get_note_info_size(&info);
2183 
2184 		sz += elf_coredump_extra_notes_size();
2185 
2186 		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2187 		if (!phdr4note)
2188 			goto end_coredump;
2189 
2190 		fill_elf_note_phdr(phdr4note, sz, offset);
2191 		offset += sz;
2192 	}
2193 
2194 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2195 
2196 	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2197 	if (!vma_filesz)
2198 		goto end_coredump;
2199 
2200 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201 			vma = next_vma(vma, gate_vma)) {
2202 		unsigned long dump_size;
2203 
2204 		dump_size = vma_dump_size(vma, cprm->mm_flags);
2205 		vma_filesz[i++] = dump_size;
2206 		vma_data_size += dump_size;
2207 	}
2208 
2209 	offset += vma_data_size;
2210 	offset += elf_core_extra_data_size();
2211 	e_shoff = offset;
2212 
2213 	if (e_phnum == PN_XNUM) {
2214 		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2215 		if (!shdr4extnum)
2216 			goto end_coredump;
2217 		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2218 	}
2219 
2220 	offset = dataoff;
2221 
2222 	if (!dump_emit(cprm, elf, sizeof(*elf)))
2223 		goto end_coredump;
2224 
2225 	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2226 		goto end_coredump;
2227 
2228 	/* Write program headers for segments dump */
2229 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230 			vma = next_vma(vma, gate_vma)) {
2231 		struct elf_phdr phdr;
2232 
2233 		phdr.p_type = PT_LOAD;
2234 		phdr.p_offset = offset;
2235 		phdr.p_vaddr = vma->vm_start;
2236 		phdr.p_paddr = 0;
2237 		phdr.p_filesz = vma_filesz[i++];
2238 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2239 		offset += phdr.p_filesz;
2240 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241 		if (vma->vm_flags & VM_WRITE)
2242 			phdr.p_flags |= PF_W;
2243 		if (vma->vm_flags & VM_EXEC)
2244 			phdr.p_flags |= PF_X;
2245 		phdr.p_align = ELF_EXEC_PAGESIZE;
2246 
2247 		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2248 			goto end_coredump;
2249 	}
2250 
2251 	if (!elf_core_write_extra_phdrs(cprm, offset))
2252 		goto end_coredump;
2253 
2254  	/* write out the notes section */
2255 	if (!write_note_info(&info, cprm))
2256 		goto end_coredump;
2257 
2258 	if (elf_coredump_extra_notes_write(cprm))
2259 		goto end_coredump;
2260 
2261 	/* Align to page */
2262 	if (!dump_skip(cprm, dataoff - cprm->written))
2263 		goto end_coredump;
2264 
2265 	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266 			vma = next_vma(vma, gate_vma)) {
2267 		unsigned long addr;
2268 		unsigned long end;
2269 
2270 		end = vma->vm_start + vma_filesz[i++];
2271 
2272 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2273 			struct page *page;
2274 			int stop;
2275 
2276 			page = get_dump_page(addr);
2277 			if (page) {
2278 				void *kaddr = kmap(page);
2279 				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2280 				kunmap(page);
2281 				page_cache_release(page);
2282 			} else
2283 				stop = !dump_skip(cprm, PAGE_SIZE);
2284 			if (stop)
2285 				goto end_coredump;
2286 		}
2287 	}
2288 
2289 	if (!elf_core_write_extra_data(cprm))
2290 		goto end_coredump;
2291 
2292 	if (e_phnum == PN_XNUM) {
2293 		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2294 			goto end_coredump;
2295 	}
2296 
2297 end_coredump:
2298 	set_fs(fs);
2299 
2300 cleanup:
2301 	free_note_info(&info);
2302 	kfree(shdr4extnum);
2303 	kfree(vma_filesz);
2304 	kfree(phdr4note);
2305 	kfree(elf);
2306 out:
2307 	return has_dumped;
2308 }
2309 
2310 #endif		/* CONFIG_ELF_CORE */
2311 
2312 static int __init init_elf_binfmt(void)
2313 {
2314 	register_binfmt(&elf_format);
2315 	return 0;
2316 }
2317 
2318 static void __exit exit_elf_binfmt(void)
2319 {
2320 	/* Remove the COFF and ELF loaders. */
2321 	unregister_binfmt(&elf_format);
2322 }
2323 
2324 core_initcall(init_elf_binfmt);
2325 module_exit(exit_elf_binfmt);
2326 MODULE_LICENSE("GPL");
2327