xref: /linux/fs/binfmt_elf.c (revision 185000fc556372b7fb7f26516c325f212030dbd3)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 		unsigned long load_addr, unsigned long interp_load_addr)
137 {
138 	unsigned long p = bprm->p;
139 	int argc = bprm->argc;
140 	int envc = bprm->envc;
141 	elf_addr_t __user *argv;
142 	elf_addr_t __user *envp;
143 	elf_addr_t __user *sp;
144 	elf_addr_t __user *u_platform;
145 	const char *k_platform = ELF_PLATFORM;
146 	int items;
147 	elf_addr_t *elf_info;
148 	int ei_index = 0;
149 	struct task_struct *tsk = current;
150 	struct vm_area_struct *vma;
151 
152 	/*
153 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
154 	 * evictions by the processes running on the same package. One
155 	 * thing we can do is to shuffle the initial stack for them.
156 	 */
157 
158 	p = arch_align_stack(p);
159 
160 	/*
161 	 * If this architecture has a platform capability string, copy it
162 	 * to userspace.  In some cases (Sparc), this info is impossible
163 	 * for userspace to get any other way, in others (i386) it is
164 	 * merely difficult.
165 	 */
166 	u_platform = NULL;
167 	if (k_platform) {
168 		size_t len = strlen(k_platform) + 1;
169 
170 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 		if (__copy_to_user(u_platform, k_platform, len))
172 			return -EFAULT;
173 	}
174 
175 	/* Create the ELF interpreter info */
176 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179 	do { \
180 		elf_info[ei_index++] = id; \
181 		elf_info[ei_index++] = val; \
182 	} while (0)
183 
184 #ifdef ARCH_DLINFO
185 	/*
186 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 	 * AUXV.
188 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189 	 * ARCH_DLINFO changes
190 	 */
191 	ARCH_DLINFO;
192 #endif
193 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
200 	NEW_AUX_ENT(AT_FLAGS, 0);
201 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202 	NEW_AUX_ENT(AT_UID, tsk->uid);
203 	NEW_AUX_ENT(AT_EUID, tsk->euid);
204 	NEW_AUX_ENT(AT_GID, tsk->gid);
205 	NEW_AUX_ENT(AT_EGID, tsk->egid);
206  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207 	if (k_platform) {
208 		NEW_AUX_ENT(AT_PLATFORM,
209 			    (elf_addr_t)(unsigned long)u_platform);
210 	}
211 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213 	}
214 #undef NEW_AUX_ENT
215 	/* AT_NULL is zero; clear the rest too */
216 	memset(&elf_info[ei_index], 0,
217 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
218 
219 	/* And advance past the AT_NULL entry.  */
220 	ei_index += 2;
221 
222 	sp = STACK_ADD(p, ei_index);
223 
224 	items = (argc + 1) + (envc + 1) + 1;
225 	bprm->p = STACK_ROUND(sp, items);
226 
227 	/* Point sp at the lowest address on the stack */
228 #ifdef CONFIG_STACK_GROWSUP
229 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
230 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
231 #else
232 	sp = (elf_addr_t __user *)bprm->p;
233 #endif
234 
235 
236 	/*
237 	 * Grow the stack manually; some architectures have a limit on how
238 	 * far ahead a user-space access may be in order to grow the stack.
239 	 */
240 	vma = find_extend_vma(current->mm, bprm->p);
241 	if (!vma)
242 		return -EFAULT;
243 
244 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
245 	if (__put_user(argc, sp++))
246 		return -EFAULT;
247 	argv = sp;
248 	envp = argv + argc + 1;
249 
250 	/* Populate argv and envp */
251 	p = current->mm->arg_end = current->mm->arg_start;
252 	while (argc-- > 0) {
253 		size_t len;
254 		if (__put_user((elf_addr_t)p, argv++))
255 			return -EFAULT;
256 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
257 		if (!len || len > MAX_ARG_STRLEN)
258 			return -EINVAL;
259 		p += len;
260 	}
261 	if (__put_user(0, argv))
262 		return -EFAULT;
263 	current->mm->arg_end = current->mm->env_start = p;
264 	while (envc-- > 0) {
265 		size_t len;
266 		if (__put_user((elf_addr_t)p, envp++))
267 			return -EFAULT;
268 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
269 		if (!len || len > MAX_ARG_STRLEN)
270 			return -EINVAL;
271 		p += len;
272 	}
273 	if (__put_user(0, envp))
274 		return -EFAULT;
275 	current->mm->env_end = p;
276 
277 	/* Put the elf_info on the stack in the right place.  */
278 	sp = (elf_addr_t __user *)envp + 1;
279 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
280 		return -EFAULT;
281 	return 0;
282 }
283 
284 #ifndef elf_map
285 
286 static unsigned long elf_map(struct file *filep, unsigned long addr,
287 		struct elf_phdr *eppnt, int prot, int type,
288 		unsigned long total_size)
289 {
290 	unsigned long map_addr;
291 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
292 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
293 	addr = ELF_PAGESTART(addr);
294 	size = ELF_PAGEALIGN(size);
295 
296 	/* mmap() will return -EINVAL if given a zero size, but a
297 	 * segment with zero filesize is perfectly valid */
298 	if (!size)
299 		return addr;
300 
301 	down_write(&current->mm->mmap_sem);
302 	/*
303 	* total_size is the size of the ELF (interpreter) image.
304 	* The _first_ mmap needs to know the full size, otherwise
305 	* randomization might put this image into an overlapping
306 	* position with the ELF binary image. (since size < total_size)
307 	* So we first map the 'big' image - and unmap the remainder at
308 	* the end. (which unmap is needed for ELF images with holes.)
309 	*/
310 	if (total_size) {
311 		total_size = ELF_PAGEALIGN(total_size);
312 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
313 		if (!BAD_ADDR(map_addr))
314 			do_munmap(current->mm, map_addr+size, total_size-size);
315 	} else
316 		map_addr = do_mmap(filep, addr, size, prot, type, off);
317 
318 	up_write(&current->mm->mmap_sem);
319 	return(map_addr);
320 }
321 
322 #endif /* !elf_map */
323 
324 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
325 {
326 	int i, first_idx = -1, last_idx = -1;
327 
328 	for (i = 0; i < nr; i++) {
329 		if (cmds[i].p_type == PT_LOAD) {
330 			last_idx = i;
331 			if (first_idx == -1)
332 				first_idx = i;
333 		}
334 	}
335 	if (first_idx == -1)
336 		return 0;
337 
338 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
339 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
340 }
341 
342 
343 /* This is much more generalized than the library routine read function,
344    so we keep this separate.  Technically the library read function
345    is only provided so that we can read a.out libraries that have
346    an ELF header */
347 
348 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
349 		struct file *interpreter, unsigned long *interp_map_addr,
350 		unsigned long no_base)
351 {
352 	struct elf_phdr *elf_phdata;
353 	struct elf_phdr *eppnt;
354 	unsigned long load_addr = 0;
355 	int load_addr_set = 0;
356 	unsigned long last_bss = 0, elf_bss = 0;
357 	unsigned long error = ~0UL;
358 	unsigned long total_size;
359 	int retval, i, size;
360 
361 	/* First of all, some simple consistency checks */
362 	if (interp_elf_ex->e_type != ET_EXEC &&
363 	    interp_elf_ex->e_type != ET_DYN)
364 		goto out;
365 	if (!elf_check_arch(interp_elf_ex))
366 		goto out;
367 	if (!interpreter->f_op || !interpreter->f_op->mmap)
368 		goto out;
369 
370 	/*
371 	 * If the size of this structure has changed, then punt, since
372 	 * we will be doing the wrong thing.
373 	 */
374 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
375 		goto out;
376 	if (interp_elf_ex->e_phnum < 1 ||
377 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
378 		goto out;
379 
380 	/* Now read in all of the header information */
381 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
382 	if (size > ELF_MIN_ALIGN)
383 		goto out;
384 	elf_phdata = kmalloc(size, GFP_KERNEL);
385 	if (!elf_phdata)
386 		goto out;
387 
388 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
389 			     (char *)elf_phdata,size);
390 	error = -EIO;
391 	if (retval != size) {
392 		if (retval < 0)
393 			error = retval;
394 		goto out_close;
395 	}
396 
397 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
398 	if (!total_size) {
399 		error = -EINVAL;
400 		goto out_close;
401 	}
402 
403 	eppnt = elf_phdata;
404 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
405 		if (eppnt->p_type == PT_LOAD) {
406 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
407 			int elf_prot = 0;
408 			unsigned long vaddr = 0;
409 			unsigned long k, map_addr;
410 
411 			if (eppnt->p_flags & PF_R)
412 		    		elf_prot = PROT_READ;
413 			if (eppnt->p_flags & PF_W)
414 				elf_prot |= PROT_WRITE;
415 			if (eppnt->p_flags & PF_X)
416 				elf_prot |= PROT_EXEC;
417 			vaddr = eppnt->p_vaddr;
418 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
419 				elf_type |= MAP_FIXED;
420 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
421 				load_addr = -vaddr;
422 
423 			map_addr = elf_map(interpreter, load_addr + vaddr,
424 					eppnt, elf_prot, elf_type, total_size);
425 			total_size = 0;
426 			if (!*interp_map_addr)
427 				*interp_map_addr = map_addr;
428 			error = map_addr;
429 			if (BAD_ADDR(map_addr))
430 				goto out_close;
431 
432 			if (!load_addr_set &&
433 			    interp_elf_ex->e_type == ET_DYN) {
434 				load_addr = map_addr - ELF_PAGESTART(vaddr);
435 				load_addr_set = 1;
436 			}
437 
438 			/*
439 			 * Check to see if the section's size will overflow the
440 			 * allowed task size. Note that p_filesz must always be
441 			 * <= p_memsize so it's only necessary to check p_memsz.
442 			 */
443 			k = load_addr + eppnt->p_vaddr;
444 			if (BAD_ADDR(k) ||
445 			    eppnt->p_filesz > eppnt->p_memsz ||
446 			    eppnt->p_memsz > TASK_SIZE ||
447 			    TASK_SIZE - eppnt->p_memsz < k) {
448 				error = -ENOMEM;
449 				goto out_close;
450 			}
451 
452 			/*
453 			 * Find the end of the file mapping for this phdr, and
454 			 * keep track of the largest address we see for this.
455 			 */
456 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
457 			if (k > elf_bss)
458 				elf_bss = k;
459 
460 			/*
461 			 * Do the same thing for the memory mapping - between
462 			 * elf_bss and last_bss is the bss section.
463 			 */
464 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
465 			if (k > last_bss)
466 				last_bss = k;
467 		}
468 	}
469 
470 	/*
471 	 * Now fill out the bss section.  First pad the last page up
472 	 * to the page boundary, and then perform a mmap to make sure
473 	 * that there are zero-mapped pages up to and including the
474 	 * last bss page.
475 	 */
476 	if (padzero(elf_bss)) {
477 		error = -EFAULT;
478 		goto out_close;
479 	}
480 
481 	/* What we have mapped so far */
482 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
483 
484 	/* Map the last of the bss segment */
485 	if (last_bss > elf_bss) {
486 		down_write(&current->mm->mmap_sem);
487 		error = do_brk(elf_bss, last_bss - elf_bss);
488 		up_write(&current->mm->mmap_sem);
489 		if (BAD_ADDR(error))
490 			goto out_close;
491 	}
492 
493 	error = load_addr;
494 
495 out_close:
496 	kfree(elf_phdata);
497 out:
498 	return error;
499 }
500 
501 /*
502  * These are the functions used to load ELF style executables and shared
503  * libraries.  There is no binary dependent code anywhere else.
504  */
505 
506 #define INTERPRETER_NONE 0
507 #define INTERPRETER_ELF 2
508 
509 #ifndef STACK_RND_MASK
510 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
511 #endif
512 
513 static unsigned long randomize_stack_top(unsigned long stack_top)
514 {
515 	unsigned int random_variable = 0;
516 
517 	if ((current->flags & PF_RANDOMIZE) &&
518 		!(current->personality & ADDR_NO_RANDOMIZE)) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned long error;
536 	struct elf_phdr *elf_ppnt, *elf_phdata;
537 	unsigned long elf_bss, elf_brk;
538 	int elf_exec_fileno;
539 	int retval, i;
540 	unsigned int size;
541 	unsigned long elf_entry;
542 	unsigned long interp_load_addr = 0;
543 	unsigned long start_code, end_code, start_data, end_data;
544 	unsigned long reloc_func_desc = 0;
545 	int executable_stack = EXSTACK_DEFAULT;
546 	unsigned long def_flags = 0;
547 	struct {
548 		struct elfhdr elf_ex;
549 		struct elfhdr interp_elf_ex;
550 	} *loc;
551 
552 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
553 	if (!loc) {
554 		retval = -ENOMEM;
555 		goto out_ret;
556 	}
557 
558 	/* Get the exec-header */
559 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
560 
561 	retval = -ENOEXEC;
562 	/* First of all, some simple consistency checks */
563 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
564 		goto out;
565 
566 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
567 		goto out;
568 	if (!elf_check_arch(&loc->elf_ex))
569 		goto out;
570 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
571 		goto out;
572 
573 	/* Now read in all of the header information */
574 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
575 		goto out;
576 	if (loc->elf_ex.e_phnum < 1 ||
577 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
578 		goto out;
579 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
580 	retval = -ENOMEM;
581 	elf_phdata = kmalloc(size, GFP_KERNEL);
582 	if (!elf_phdata)
583 		goto out;
584 
585 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
586 			     (char *)elf_phdata, size);
587 	if (retval != size) {
588 		if (retval >= 0)
589 			retval = -EIO;
590 		goto out_free_ph;
591 	}
592 
593 	retval = get_unused_fd();
594 	if (retval < 0)
595 		goto out_free_ph;
596 	get_file(bprm->file);
597 	fd_install(elf_exec_fileno = retval, bprm->file);
598 
599 	elf_ppnt = elf_phdata;
600 	elf_bss = 0;
601 	elf_brk = 0;
602 
603 	start_code = ~0UL;
604 	end_code = 0;
605 	start_data = 0;
606 	end_data = 0;
607 
608 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
609 		if (elf_ppnt->p_type == PT_INTERP) {
610 			/* This is the program interpreter used for
611 			 * shared libraries - for now assume that this
612 			 * is an a.out format binary
613 			 */
614 			retval = -ENOEXEC;
615 			if (elf_ppnt->p_filesz > PATH_MAX ||
616 			    elf_ppnt->p_filesz < 2)
617 				goto out_free_file;
618 
619 			retval = -ENOMEM;
620 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
621 						  GFP_KERNEL);
622 			if (!elf_interpreter)
623 				goto out_free_file;
624 
625 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
626 					     elf_interpreter,
627 					     elf_ppnt->p_filesz);
628 			if (retval != elf_ppnt->p_filesz) {
629 				if (retval >= 0)
630 					retval = -EIO;
631 				goto out_free_interp;
632 			}
633 			/* make sure path is NULL terminated */
634 			retval = -ENOEXEC;
635 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
636 				goto out_free_interp;
637 
638 			/*
639 			 * The early SET_PERSONALITY here is so that the lookup
640 			 * for the interpreter happens in the namespace of the
641 			 * to-be-execed image.  SET_PERSONALITY can select an
642 			 * alternate root.
643 			 *
644 			 * However, SET_PERSONALITY is NOT allowed to switch
645 			 * this task into the new images's memory mapping
646 			 * policy - that is, TASK_SIZE must still evaluate to
647 			 * that which is appropriate to the execing application.
648 			 * This is because exit_mmap() needs to have TASK_SIZE
649 			 * evaluate to the size of the old image.
650 			 *
651 			 * So if (say) a 64-bit application is execing a 32-bit
652 			 * application it is the architecture's responsibility
653 			 * to defer changing the value of TASK_SIZE until the
654 			 * switch really is going to happen - do this in
655 			 * flush_thread().	- akpm
656 			 */
657 			SET_PERSONALITY(loc->elf_ex, 0);
658 
659 			interpreter = open_exec(elf_interpreter);
660 			retval = PTR_ERR(interpreter);
661 			if (IS_ERR(interpreter))
662 				goto out_free_interp;
663 
664 			/*
665 			 * If the binary is not readable then enforce
666 			 * mm->dumpable = 0 regardless of the interpreter's
667 			 * permissions.
668 			 */
669 			if (file_permission(interpreter, MAY_READ) < 0)
670 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
671 
672 			retval = kernel_read(interpreter, 0, bprm->buf,
673 					     BINPRM_BUF_SIZE);
674 			if (retval != BINPRM_BUF_SIZE) {
675 				if (retval >= 0)
676 					retval = -EIO;
677 				goto out_free_dentry;
678 			}
679 
680 			/* Get the exec headers */
681 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
682 			break;
683 		}
684 		elf_ppnt++;
685 	}
686 
687 	elf_ppnt = elf_phdata;
688 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
689 		if (elf_ppnt->p_type == PT_GNU_STACK) {
690 			if (elf_ppnt->p_flags & PF_X)
691 				executable_stack = EXSTACK_ENABLE_X;
692 			else
693 				executable_stack = EXSTACK_DISABLE_X;
694 			break;
695 		}
696 
697 	/* Some simple consistency checks for the interpreter */
698 	if (elf_interpreter) {
699 		retval = -ELIBBAD;
700 		/* Not an ELF interpreter */
701 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
702 			goto out_free_dentry;
703 		/* Verify the interpreter has a valid arch */
704 		if (!elf_check_arch(&loc->interp_elf_ex))
705 			goto out_free_dentry;
706 	} else {
707 		/* Executables without an interpreter also need a personality  */
708 		SET_PERSONALITY(loc->elf_ex, 0);
709 	}
710 
711 	/* Flush all traces of the currently running executable */
712 	retval = flush_old_exec(bprm);
713 	if (retval)
714 		goto out_free_dentry;
715 
716 	/* OK, This is the point of no return */
717 	current->flags &= ~PF_FORKNOEXEC;
718 	current->mm->def_flags = def_flags;
719 
720 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
721 	   may depend on the personality.  */
722 	SET_PERSONALITY(loc->elf_ex, 0);
723 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
724 		current->personality |= READ_IMPLIES_EXEC;
725 
726 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
727 		current->flags |= PF_RANDOMIZE;
728 	arch_pick_mmap_layout(current->mm);
729 
730 	/* Do this so that we can load the interpreter, if need be.  We will
731 	   change some of these later */
732 	current->mm->free_area_cache = current->mm->mmap_base;
733 	current->mm->cached_hole_size = 0;
734 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
735 				 executable_stack);
736 	if (retval < 0) {
737 		send_sig(SIGKILL, current, 0);
738 		goto out_free_dentry;
739 	}
740 
741 	current->mm->start_stack = bprm->p;
742 
743 	/* Now we do a little grungy work by mmaping the ELF image into
744 	   the correct location in memory. */
745 	for(i = 0, elf_ppnt = elf_phdata;
746 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
747 		int elf_prot = 0, elf_flags;
748 		unsigned long k, vaddr;
749 
750 		if (elf_ppnt->p_type != PT_LOAD)
751 			continue;
752 
753 		if (unlikely (elf_brk > elf_bss)) {
754 			unsigned long nbyte;
755 
756 			/* There was a PT_LOAD segment with p_memsz > p_filesz
757 			   before this one. Map anonymous pages, if needed,
758 			   and clear the area.  */
759 			retval = set_brk (elf_bss + load_bias,
760 					  elf_brk + load_bias);
761 			if (retval) {
762 				send_sig(SIGKILL, current, 0);
763 				goto out_free_dentry;
764 			}
765 			nbyte = ELF_PAGEOFFSET(elf_bss);
766 			if (nbyte) {
767 				nbyte = ELF_MIN_ALIGN - nbyte;
768 				if (nbyte > elf_brk - elf_bss)
769 					nbyte = elf_brk - elf_bss;
770 				if (clear_user((void __user *)elf_bss +
771 							load_bias, nbyte)) {
772 					/*
773 					 * This bss-zeroing can fail if the ELF
774 					 * file specifies odd protections. So
775 					 * we don't check the return value
776 					 */
777 				}
778 			}
779 		}
780 
781 		if (elf_ppnt->p_flags & PF_R)
782 			elf_prot |= PROT_READ;
783 		if (elf_ppnt->p_flags & PF_W)
784 			elf_prot |= PROT_WRITE;
785 		if (elf_ppnt->p_flags & PF_X)
786 			elf_prot |= PROT_EXEC;
787 
788 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
789 
790 		vaddr = elf_ppnt->p_vaddr;
791 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
792 			elf_flags |= MAP_FIXED;
793 		} else if (loc->elf_ex.e_type == ET_DYN) {
794 			/* Try and get dynamic programs out of the way of the
795 			 * default mmap base, as well as whatever program they
796 			 * might try to exec.  This is because the brk will
797 			 * follow the loader, and is not movable.  */
798 #ifdef CONFIG_X86
799 			load_bias = 0;
800 #else
801 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 #endif
803 		}
804 
805 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
806 				elf_prot, elf_flags, 0);
807 		if (BAD_ADDR(error)) {
808 			send_sig(SIGKILL, current, 0);
809 			retval = IS_ERR((void *)error) ?
810 				PTR_ERR((void*)error) : -EINVAL;
811 			goto out_free_dentry;
812 		}
813 
814 		if (!load_addr_set) {
815 			load_addr_set = 1;
816 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
817 			if (loc->elf_ex.e_type == ET_DYN) {
818 				load_bias += error -
819 				             ELF_PAGESTART(load_bias + vaddr);
820 				load_addr += load_bias;
821 				reloc_func_desc = load_bias;
822 			}
823 		}
824 		k = elf_ppnt->p_vaddr;
825 		if (k < start_code)
826 			start_code = k;
827 		if (start_data < k)
828 			start_data = k;
829 
830 		/*
831 		 * Check to see if the section's size will overflow the
832 		 * allowed task size. Note that p_filesz must always be
833 		 * <= p_memsz so it is only necessary to check p_memsz.
834 		 */
835 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
836 		    elf_ppnt->p_memsz > TASK_SIZE ||
837 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
838 			/* set_brk can never work. Avoid overflows. */
839 			send_sig(SIGKILL, current, 0);
840 			retval = -EINVAL;
841 			goto out_free_dentry;
842 		}
843 
844 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
845 
846 		if (k > elf_bss)
847 			elf_bss = k;
848 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
849 			end_code = k;
850 		if (end_data < k)
851 			end_data = k;
852 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
853 		if (k > elf_brk)
854 			elf_brk = k;
855 	}
856 
857 	loc->elf_ex.e_entry += load_bias;
858 	elf_bss += load_bias;
859 	elf_brk += load_bias;
860 	start_code += load_bias;
861 	end_code += load_bias;
862 	start_data += load_bias;
863 	end_data += load_bias;
864 
865 	/* Calling set_brk effectively mmaps the pages that we need
866 	 * for the bss and break sections.  We must do this before
867 	 * mapping in the interpreter, to make sure it doesn't wind
868 	 * up getting placed where the bss needs to go.
869 	 */
870 	retval = set_brk(elf_bss, elf_brk);
871 	if (retval) {
872 		send_sig(SIGKILL, current, 0);
873 		goto out_free_dentry;
874 	}
875 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
876 		send_sig(SIGSEGV, current, 0);
877 		retval = -EFAULT; /* Nobody gets to see this, but.. */
878 		goto out_free_dentry;
879 	}
880 
881 	if (elf_interpreter) {
882 		unsigned long uninitialized_var(interp_map_addr);
883 
884 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
885 					    interpreter,
886 					    &interp_map_addr,
887 					    load_bias);
888 		if (!IS_ERR((void *)elf_entry)) {
889 			/*
890 			 * load_elf_interp() returns relocation
891 			 * adjustment
892 			 */
893 			interp_load_addr = elf_entry;
894 			elf_entry += loc->interp_elf_ex.e_entry;
895 		}
896 		if (BAD_ADDR(elf_entry)) {
897 			force_sig(SIGSEGV, current);
898 			retval = IS_ERR((void *)elf_entry) ?
899 					(int)elf_entry : -EINVAL;
900 			goto out_free_dentry;
901 		}
902 		reloc_func_desc = interp_load_addr;
903 
904 		allow_write_access(interpreter);
905 		fput(interpreter);
906 		kfree(elf_interpreter);
907 	} else {
908 		elf_entry = loc->elf_ex.e_entry;
909 		if (BAD_ADDR(elf_entry)) {
910 			force_sig(SIGSEGV, current);
911 			retval = -EINVAL;
912 			goto out_free_dentry;
913 		}
914 	}
915 
916 	kfree(elf_phdata);
917 
918 	sys_close(elf_exec_fileno);
919 
920 	set_binfmt(&elf_format);
921 
922 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
923 	retval = arch_setup_additional_pages(bprm, executable_stack);
924 	if (retval < 0) {
925 		send_sig(SIGKILL, current, 0);
926 		goto out;
927 	}
928 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929 
930 	compute_creds(bprm);
931 	current->flags &= ~PF_FORKNOEXEC;
932 	retval = create_elf_tables(bprm, &loc->elf_ex,
933 			  load_addr, interp_load_addr);
934 	if (retval < 0) {
935 		send_sig(SIGKILL, current, 0);
936 		goto out;
937 	}
938 	/* N.B. passed_fileno might not be initialized? */
939 	current->mm->end_code = end_code;
940 	current->mm->start_code = start_code;
941 	current->mm->start_data = start_data;
942 	current->mm->end_data = end_data;
943 	current->mm->start_stack = bprm->p;
944 
945 #ifdef arch_randomize_brk
946 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
947 		current->mm->brk = current->mm->start_brk =
948 			arch_randomize_brk(current->mm);
949 #endif
950 
951 	if (current->personality & MMAP_PAGE_ZERO) {
952 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
953 		   and some applications "depend" upon this behavior.
954 		   Since we do not have the power to recompile these, we
955 		   emulate the SVr4 behavior. Sigh. */
956 		down_write(&current->mm->mmap_sem);
957 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
958 				MAP_FIXED | MAP_PRIVATE, 0);
959 		up_write(&current->mm->mmap_sem);
960 	}
961 
962 #ifdef ELF_PLAT_INIT
963 	/*
964 	 * The ABI may specify that certain registers be set up in special
965 	 * ways (on i386 %edx is the address of a DT_FINI function, for
966 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
967 	 * that the e_entry field is the address of the function descriptor
968 	 * for the startup routine, rather than the address of the startup
969 	 * routine itself.  This macro performs whatever initialization to
970 	 * the regs structure is required as well as any relocations to the
971 	 * function descriptor entries when executing dynamically links apps.
972 	 */
973 	ELF_PLAT_INIT(regs, reloc_func_desc);
974 #endif
975 
976 	start_thread(regs, elf_entry, bprm->p);
977 	if (unlikely(current->ptrace & PT_PTRACED)) {
978 		if (current->ptrace & PT_TRACE_EXEC)
979 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
980 		else
981 			send_sig(SIGTRAP, current, 0);
982 	}
983 	retval = 0;
984 out:
985 	kfree(loc);
986 out_ret:
987 	return retval;
988 
989 	/* error cleanup */
990 out_free_dentry:
991 	allow_write_access(interpreter);
992 	if (interpreter)
993 		fput(interpreter);
994 out_free_interp:
995 	kfree(elf_interpreter);
996 out_free_file:
997 	sys_close(elf_exec_fileno);
998 out_free_ph:
999 	kfree(elf_phdata);
1000 	goto out;
1001 }
1002 
1003 /* This is really simpleminded and specialized - we are loading an
1004    a.out library that is given an ELF header. */
1005 static int load_elf_library(struct file *file)
1006 {
1007 	struct elf_phdr *elf_phdata;
1008 	struct elf_phdr *eppnt;
1009 	unsigned long elf_bss, bss, len;
1010 	int retval, error, i, j;
1011 	struct elfhdr elf_ex;
1012 
1013 	error = -ENOEXEC;
1014 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1015 	if (retval != sizeof(elf_ex))
1016 		goto out;
1017 
1018 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1019 		goto out;
1020 
1021 	/* First of all, some simple consistency checks */
1022 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1023 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1024 		goto out;
1025 
1026 	/* Now read in all of the header information */
1027 
1028 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1029 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1030 
1031 	error = -ENOMEM;
1032 	elf_phdata = kmalloc(j, GFP_KERNEL);
1033 	if (!elf_phdata)
1034 		goto out;
1035 
1036 	eppnt = elf_phdata;
1037 	error = -ENOEXEC;
1038 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1039 	if (retval != j)
1040 		goto out_free_ph;
1041 
1042 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1043 		if ((eppnt + i)->p_type == PT_LOAD)
1044 			j++;
1045 	if (j != 1)
1046 		goto out_free_ph;
1047 
1048 	while (eppnt->p_type != PT_LOAD)
1049 		eppnt++;
1050 
1051 	/* Now use mmap to map the library into memory. */
1052 	down_write(&current->mm->mmap_sem);
1053 	error = do_mmap(file,
1054 			ELF_PAGESTART(eppnt->p_vaddr),
1055 			(eppnt->p_filesz +
1056 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1057 			PROT_READ | PROT_WRITE | PROT_EXEC,
1058 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1059 			(eppnt->p_offset -
1060 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1061 	up_write(&current->mm->mmap_sem);
1062 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1063 		goto out_free_ph;
1064 
1065 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1066 	if (padzero(elf_bss)) {
1067 		error = -EFAULT;
1068 		goto out_free_ph;
1069 	}
1070 
1071 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1072 			    ELF_MIN_ALIGN - 1);
1073 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1074 	if (bss > len) {
1075 		down_write(&current->mm->mmap_sem);
1076 		do_brk(len, bss - len);
1077 		up_write(&current->mm->mmap_sem);
1078 	}
1079 	error = 0;
1080 
1081 out_free_ph:
1082 	kfree(elf_phdata);
1083 out:
1084 	return error;
1085 }
1086 
1087 /*
1088  * Note that some platforms still use traditional core dumps and not
1089  * the ELF core dump.  Each platform can select it as appropriate.
1090  */
1091 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1092 
1093 /*
1094  * ELF core dumper
1095  *
1096  * Modelled on fs/exec.c:aout_core_dump()
1097  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1098  */
1099 /*
1100  * These are the only things you should do on a core-file: use only these
1101  * functions to write out all the necessary info.
1102  */
1103 static int dump_write(struct file *file, const void *addr, int nr)
1104 {
1105 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1106 }
1107 
1108 static int dump_seek(struct file *file, loff_t off)
1109 {
1110 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1111 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1112 			return 0;
1113 	} else {
1114 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1115 		if (!buf)
1116 			return 0;
1117 		while (off > 0) {
1118 			unsigned long n = off;
1119 			if (n > PAGE_SIZE)
1120 				n = PAGE_SIZE;
1121 			if (!dump_write(file, buf, n))
1122 				return 0;
1123 			off -= n;
1124 		}
1125 		free_page((unsigned long)buf);
1126 	}
1127 	return 1;
1128 }
1129 
1130 /*
1131  * Decide what to dump of a segment, part, all or none.
1132  */
1133 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1134 				   unsigned long mm_flags)
1135 {
1136 	/* The vma can be set up to tell us the answer directly.  */
1137 	if (vma->vm_flags & VM_ALWAYSDUMP)
1138 		goto whole;
1139 
1140 	/* Do not dump I/O mapped devices or special mappings */
1141 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1142 		return 0;
1143 
1144 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1145 
1146 	/* By default, dump shared memory if mapped from an anonymous file. */
1147 	if (vma->vm_flags & VM_SHARED) {
1148 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1149 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1150 			goto whole;
1151 		return 0;
1152 	}
1153 
1154 	/* Dump segments that have been written to.  */
1155 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1156 		goto whole;
1157 	if (vma->vm_file == NULL)
1158 		return 0;
1159 
1160 	if (FILTER(MAPPED_PRIVATE))
1161 		goto whole;
1162 
1163 	/*
1164 	 * If this looks like the beginning of a DSO or executable mapping,
1165 	 * check for an ELF header.  If we find one, dump the first page to
1166 	 * aid in determining what was mapped here.
1167 	 */
1168 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1169 		u32 __user *header = (u32 __user *) vma->vm_start;
1170 		u32 word;
1171 		/*
1172 		 * Doing it this way gets the constant folded by GCC.
1173 		 */
1174 		union {
1175 			u32 cmp;
1176 			char elfmag[SELFMAG];
1177 		} magic;
1178 		BUILD_BUG_ON(SELFMAG != sizeof word);
1179 		magic.elfmag[EI_MAG0] = ELFMAG0;
1180 		magic.elfmag[EI_MAG1] = ELFMAG1;
1181 		magic.elfmag[EI_MAG2] = ELFMAG2;
1182 		magic.elfmag[EI_MAG3] = ELFMAG3;
1183 		if (get_user(word, header) == 0 && word == magic.cmp)
1184 			return PAGE_SIZE;
1185 	}
1186 
1187 #undef	FILTER
1188 
1189 	return 0;
1190 
1191 whole:
1192 	return vma->vm_end - vma->vm_start;
1193 }
1194 
1195 /* An ELF note in memory */
1196 struct memelfnote
1197 {
1198 	const char *name;
1199 	int type;
1200 	unsigned int datasz;
1201 	void *data;
1202 };
1203 
1204 static int notesize(struct memelfnote *en)
1205 {
1206 	int sz;
1207 
1208 	sz = sizeof(struct elf_note);
1209 	sz += roundup(strlen(en->name) + 1, 4);
1210 	sz += roundup(en->datasz, 4);
1211 
1212 	return sz;
1213 }
1214 
1215 #define DUMP_WRITE(addr, nr, foffset)	\
1216 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1217 
1218 static int alignfile(struct file *file, loff_t *foffset)
1219 {
1220 	static const char buf[4] = { 0, };
1221 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1222 	return 1;
1223 }
1224 
1225 static int writenote(struct memelfnote *men, struct file *file,
1226 			loff_t *foffset)
1227 {
1228 	struct elf_note en;
1229 	en.n_namesz = strlen(men->name) + 1;
1230 	en.n_descsz = men->datasz;
1231 	en.n_type = men->type;
1232 
1233 	DUMP_WRITE(&en, sizeof(en), foffset);
1234 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1235 	if (!alignfile(file, foffset))
1236 		return 0;
1237 	DUMP_WRITE(men->data, men->datasz, foffset);
1238 	if (!alignfile(file, foffset))
1239 		return 0;
1240 
1241 	return 1;
1242 }
1243 #undef DUMP_WRITE
1244 
1245 #define DUMP_WRITE(addr, nr)	\
1246 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1247 		goto end_coredump;
1248 #define DUMP_SEEK(off)	\
1249 	if (!dump_seek(file, (off))) \
1250 		goto end_coredump;
1251 
1252 static void fill_elf_header(struct elfhdr *elf, int segs,
1253 			    u16 machine, u32 flags, u8 osabi)
1254 {
1255 	memset(elf, 0, sizeof(*elf));
1256 
1257 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1258 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1259 	elf->e_ident[EI_DATA] = ELF_DATA;
1260 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1261 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1262 
1263 	elf->e_type = ET_CORE;
1264 	elf->e_machine = machine;
1265 	elf->e_version = EV_CURRENT;
1266 	elf->e_phoff = sizeof(struct elfhdr);
1267 	elf->e_flags = flags;
1268 	elf->e_ehsize = sizeof(struct elfhdr);
1269 	elf->e_phentsize = sizeof(struct elf_phdr);
1270 	elf->e_phnum = segs;
1271 
1272 	return;
1273 }
1274 
1275 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1276 {
1277 	phdr->p_type = PT_NOTE;
1278 	phdr->p_offset = offset;
1279 	phdr->p_vaddr = 0;
1280 	phdr->p_paddr = 0;
1281 	phdr->p_filesz = sz;
1282 	phdr->p_memsz = 0;
1283 	phdr->p_flags = 0;
1284 	phdr->p_align = 0;
1285 	return;
1286 }
1287 
1288 static void fill_note(struct memelfnote *note, const char *name, int type,
1289 		unsigned int sz, void *data)
1290 {
1291 	note->name = name;
1292 	note->type = type;
1293 	note->datasz = sz;
1294 	note->data = data;
1295 	return;
1296 }
1297 
1298 /*
1299  * fill up all the fields in prstatus from the given task struct, except
1300  * registers which need to be filled up separately.
1301  */
1302 static void fill_prstatus(struct elf_prstatus *prstatus,
1303 		struct task_struct *p, long signr)
1304 {
1305 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1306 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1307 	prstatus->pr_sighold = p->blocked.sig[0];
1308 	prstatus->pr_pid = task_pid_vnr(p);
1309 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1310 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1311 	prstatus->pr_sid = task_session_vnr(p);
1312 	if (thread_group_leader(p)) {
1313 		/*
1314 		 * This is the record for the group leader.  Add in the
1315 		 * cumulative times of previous dead threads.  This total
1316 		 * won't include the time of each live thread whose state
1317 		 * is included in the core dump.  The final total reported
1318 		 * to our parent process when it calls wait4 will include
1319 		 * those sums as well as the little bit more time it takes
1320 		 * this and each other thread to finish dying after the
1321 		 * core dump synchronization phase.
1322 		 */
1323 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1324 				   &prstatus->pr_utime);
1325 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1326 				   &prstatus->pr_stime);
1327 	} else {
1328 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1329 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1330 	}
1331 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1332 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1333 }
1334 
1335 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1336 		       struct mm_struct *mm)
1337 {
1338 	unsigned int i, len;
1339 
1340 	/* first copy the parameters from user space */
1341 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1342 
1343 	len = mm->arg_end - mm->arg_start;
1344 	if (len >= ELF_PRARGSZ)
1345 		len = ELF_PRARGSZ-1;
1346 	if (copy_from_user(&psinfo->pr_psargs,
1347 		           (const char __user *)mm->arg_start, len))
1348 		return -EFAULT;
1349 	for(i = 0; i < len; i++)
1350 		if (psinfo->pr_psargs[i] == 0)
1351 			psinfo->pr_psargs[i] = ' ';
1352 	psinfo->pr_psargs[len] = 0;
1353 
1354 	psinfo->pr_pid = task_pid_vnr(p);
1355 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1356 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1357 	psinfo->pr_sid = task_session_vnr(p);
1358 
1359 	i = p->state ? ffz(~p->state) + 1 : 0;
1360 	psinfo->pr_state = i;
1361 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1362 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1363 	psinfo->pr_nice = task_nice(p);
1364 	psinfo->pr_flag = p->flags;
1365 	SET_UID(psinfo->pr_uid, p->uid);
1366 	SET_GID(psinfo->pr_gid, p->gid);
1367 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1368 
1369 	return 0;
1370 }
1371 
1372 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1373 {
1374 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1375 	int i = 0;
1376 	do
1377 		i += 2;
1378 	while (auxv[i - 2] != AT_NULL);
1379 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1380 }
1381 
1382 #ifdef CORE_DUMP_USE_REGSET
1383 #include <linux/regset.h>
1384 
1385 struct elf_thread_core_info {
1386 	struct elf_thread_core_info *next;
1387 	struct task_struct *task;
1388 	struct elf_prstatus prstatus;
1389 	struct memelfnote notes[0];
1390 };
1391 
1392 struct elf_note_info {
1393 	struct elf_thread_core_info *thread;
1394 	struct memelfnote psinfo;
1395 	struct memelfnote auxv;
1396 	size_t size;
1397 	int thread_notes;
1398 };
1399 
1400 /*
1401  * When a regset has a writeback hook, we call it on each thread before
1402  * dumping user memory.  On register window machines, this makes sure the
1403  * user memory backing the register data is up to date before we read it.
1404  */
1405 static void do_thread_regset_writeback(struct task_struct *task,
1406 				       const struct user_regset *regset)
1407 {
1408 	if (regset->writeback)
1409 		regset->writeback(task, regset, 1);
1410 }
1411 
1412 static int fill_thread_core_info(struct elf_thread_core_info *t,
1413 				 const struct user_regset_view *view,
1414 				 long signr, size_t *total)
1415 {
1416 	unsigned int i;
1417 
1418 	/*
1419 	 * NT_PRSTATUS is the one special case, because the regset data
1420 	 * goes into the pr_reg field inside the note contents, rather
1421 	 * than being the whole note contents.  We fill the reset in here.
1422 	 * We assume that regset 0 is NT_PRSTATUS.
1423 	 */
1424 	fill_prstatus(&t->prstatus, t->task, signr);
1425 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1426 				    0, sizeof(t->prstatus.pr_reg),
1427 				    &t->prstatus.pr_reg, NULL);
1428 
1429 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1430 		  sizeof(t->prstatus), &t->prstatus);
1431 	*total += notesize(&t->notes[0]);
1432 
1433 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1434 
1435 	/*
1436 	 * Each other regset might generate a note too.  For each regset
1437 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1438 	 * all zero and we'll know to skip writing it later.
1439 	 */
1440 	for (i = 1; i < view->n; ++i) {
1441 		const struct user_regset *regset = &view->regsets[i];
1442 		do_thread_regset_writeback(t->task, regset);
1443 		if (regset->core_note_type &&
1444 		    (!regset->active || regset->active(t->task, regset))) {
1445 			int ret;
1446 			size_t size = regset->n * regset->size;
1447 			void *data = kmalloc(size, GFP_KERNEL);
1448 			if (unlikely(!data))
1449 				return 0;
1450 			ret = regset->get(t->task, regset,
1451 					  0, size, data, NULL);
1452 			if (unlikely(ret))
1453 				kfree(data);
1454 			else {
1455 				if (regset->core_note_type != NT_PRFPREG)
1456 					fill_note(&t->notes[i], "LINUX",
1457 						  regset->core_note_type,
1458 						  size, data);
1459 				else {
1460 					t->prstatus.pr_fpvalid = 1;
1461 					fill_note(&t->notes[i], "CORE",
1462 						  NT_PRFPREG, size, data);
1463 				}
1464 				*total += notesize(&t->notes[i]);
1465 			}
1466 		}
1467 	}
1468 
1469 	return 1;
1470 }
1471 
1472 static int fill_note_info(struct elfhdr *elf, int phdrs,
1473 			  struct elf_note_info *info,
1474 			  long signr, struct pt_regs *regs)
1475 {
1476 	struct task_struct *dump_task = current;
1477 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1478 	struct elf_thread_core_info *t;
1479 	struct elf_prpsinfo *psinfo;
1480 	struct task_struct *g, *p;
1481 	unsigned int i;
1482 
1483 	info->size = 0;
1484 	info->thread = NULL;
1485 
1486 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1487 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1488 
1489 	if (psinfo == NULL)
1490 		return 0;
1491 
1492 	/*
1493 	 * Figure out how many notes we're going to need for each thread.
1494 	 */
1495 	info->thread_notes = 0;
1496 	for (i = 0; i < view->n; ++i)
1497 		if (view->regsets[i].core_note_type != 0)
1498 			++info->thread_notes;
1499 
1500 	/*
1501 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1502 	 * since it is our one special case.
1503 	 */
1504 	if (unlikely(info->thread_notes == 0) ||
1505 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1506 		WARN_ON(1);
1507 		return 0;
1508 	}
1509 
1510 	/*
1511 	 * Initialize the ELF file header.
1512 	 */
1513 	fill_elf_header(elf, phdrs,
1514 			view->e_machine, view->e_flags, view->ei_osabi);
1515 
1516 	/*
1517 	 * Allocate a structure for each thread.
1518 	 */
1519 	rcu_read_lock();
1520 	do_each_thread(g, p)
1521 		if (p->mm == dump_task->mm) {
1522 			t = kzalloc(offsetof(struct elf_thread_core_info,
1523 					     notes[info->thread_notes]),
1524 				    GFP_ATOMIC);
1525 			if (unlikely(!t)) {
1526 				rcu_read_unlock();
1527 				return 0;
1528 			}
1529 			t->task = p;
1530 			if (p == dump_task || !info->thread) {
1531 				t->next = info->thread;
1532 				info->thread = t;
1533 			} else {
1534 				/*
1535 				 * Make sure to keep the original task at
1536 				 * the head of the list.
1537 				 */
1538 				t->next = info->thread->next;
1539 				info->thread->next = t;
1540 			}
1541 		}
1542 	while_each_thread(g, p);
1543 	rcu_read_unlock();
1544 
1545 	/*
1546 	 * Now fill in each thread's information.
1547 	 */
1548 	for (t = info->thread; t != NULL; t = t->next)
1549 		if (!fill_thread_core_info(t, view, signr, &info->size))
1550 			return 0;
1551 
1552 	/*
1553 	 * Fill in the two process-wide notes.
1554 	 */
1555 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1556 	info->size += notesize(&info->psinfo);
1557 
1558 	fill_auxv_note(&info->auxv, current->mm);
1559 	info->size += notesize(&info->auxv);
1560 
1561 	return 1;
1562 }
1563 
1564 static size_t get_note_info_size(struct elf_note_info *info)
1565 {
1566 	return info->size;
1567 }
1568 
1569 /*
1570  * Write all the notes for each thread.  When writing the first thread, the
1571  * process-wide notes are interleaved after the first thread-specific note.
1572  */
1573 static int write_note_info(struct elf_note_info *info,
1574 			   struct file *file, loff_t *foffset)
1575 {
1576 	bool first = 1;
1577 	struct elf_thread_core_info *t = info->thread;
1578 
1579 	do {
1580 		int i;
1581 
1582 		if (!writenote(&t->notes[0], file, foffset))
1583 			return 0;
1584 
1585 		if (first && !writenote(&info->psinfo, file, foffset))
1586 			return 0;
1587 		if (first && !writenote(&info->auxv, file, foffset))
1588 			return 0;
1589 
1590 		for (i = 1; i < info->thread_notes; ++i)
1591 			if (t->notes[i].data &&
1592 			    !writenote(&t->notes[i], file, foffset))
1593 				return 0;
1594 
1595 		first = 0;
1596 		t = t->next;
1597 	} while (t);
1598 
1599 	return 1;
1600 }
1601 
1602 static void free_note_info(struct elf_note_info *info)
1603 {
1604 	struct elf_thread_core_info *threads = info->thread;
1605 	while (threads) {
1606 		unsigned int i;
1607 		struct elf_thread_core_info *t = threads;
1608 		threads = t->next;
1609 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1610 		for (i = 1; i < info->thread_notes; ++i)
1611 			kfree(t->notes[i].data);
1612 		kfree(t);
1613 	}
1614 	kfree(info->psinfo.data);
1615 }
1616 
1617 #else
1618 
1619 /* Here is the structure in which status of each thread is captured. */
1620 struct elf_thread_status
1621 {
1622 	struct list_head list;
1623 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1624 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1625 	struct task_struct *thread;
1626 #ifdef ELF_CORE_COPY_XFPREGS
1627 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1628 #endif
1629 	struct memelfnote notes[3];
1630 	int num_notes;
1631 };
1632 
1633 /*
1634  * In order to add the specific thread information for the elf file format,
1635  * we need to keep a linked list of every threads pr_status and then create
1636  * a single section for them in the final core file.
1637  */
1638 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1639 {
1640 	int sz = 0;
1641 	struct task_struct *p = t->thread;
1642 	t->num_notes = 0;
1643 
1644 	fill_prstatus(&t->prstatus, p, signr);
1645 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1646 
1647 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1648 		  &(t->prstatus));
1649 	t->num_notes++;
1650 	sz += notesize(&t->notes[0]);
1651 
1652 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1653 								&t->fpu))) {
1654 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1655 			  &(t->fpu));
1656 		t->num_notes++;
1657 		sz += notesize(&t->notes[1]);
1658 	}
1659 
1660 #ifdef ELF_CORE_COPY_XFPREGS
1661 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1662 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1663 			  sizeof(t->xfpu), &t->xfpu);
1664 		t->num_notes++;
1665 		sz += notesize(&t->notes[2]);
1666 	}
1667 #endif
1668 	return sz;
1669 }
1670 
1671 struct elf_note_info {
1672 	struct memelfnote *notes;
1673 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1674 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1675 	struct list_head thread_list;
1676 	elf_fpregset_t *fpu;
1677 #ifdef ELF_CORE_COPY_XFPREGS
1678 	elf_fpxregset_t *xfpu;
1679 #endif
1680 	int thread_status_size;
1681 	int numnote;
1682 };
1683 
1684 static int fill_note_info(struct elfhdr *elf, int phdrs,
1685 			  struct elf_note_info *info,
1686 			  long signr, struct pt_regs *regs)
1687 {
1688 #define	NUM_NOTES	6
1689 	struct list_head *t;
1690 	struct task_struct *g, *p;
1691 
1692 	info->notes = NULL;
1693 	info->prstatus = NULL;
1694 	info->psinfo = NULL;
1695 	info->fpu = NULL;
1696 #ifdef ELF_CORE_COPY_XFPREGS
1697 	info->xfpu = NULL;
1698 #endif
1699 	INIT_LIST_HEAD(&info->thread_list);
1700 
1701 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1702 			      GFP_KERNEL);
1703 	if (!info->notes)
1704 		return 0;
1705 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1706 	if (!info->psinfo)
1707 		return 0;
1708 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1709 	if (!info->prstatus)
1710 		return 0;
1711 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1712 	if (!info->fpu)
1713 		return 0;
1714 #ifdef ELF_CORE_COPY_XFPREGS
1715 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1716 	if (!info->xfpu)
1717 		return 0;
1718 #endif
1719 
1720 	info->thread_status_size = 0;
1721 	if (signr) {
1722 		struct elf_thread_status *ets;
1723 		rcu_read_lock();
1724 		do_each_thread(g, p)
1725 			if (current->mm == p->mm && current != p) {
1726 				ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1727 				if (!ets) {
1728 					rcu_read_unlock();
1729 					return 0;
1730 				}
1731 				ets->thread = p;
1732 				list_add(&ets->list, &info->thread_list);
1733 			}
1734 		while_each_thread(g, p);
1735 		rcu_read_unlock();
1736 		list_for_each(t, &info->thread_list) {
1737 			int sz;
1738 
1739 			ets = list_entry(t, struct elf_thread_status, list);
1740 			sz = elf_dump_thread_status(signr, ets);
1741 			info->thread_status_size += sz;
1742 		}
1743 	}
1744 	/* now collect the dump for the current */
1745 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1746 	fill_prstatus(info->prstatus, current, signr);
1747 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1748 
1749 	/* Set up header */
1750 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1751 
1752 	/*
1753 	 * Set up the notes in similar form to SVR4 core dumps made
1754 	 * with info from their /proc.
1755 	 */
1756 
1757 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1758 		  sizeof(*info->prstatus), info->prstatus);
1759 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1760 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1761 		  sizeof(*info->psinfo), info->psinfo);
1762 
1763 	info->numnote = 2;
1764 
1765 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1766 
1767 	/* Try to dump the FPU. */
1768 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1769 							       info->fpu);
1770 	if (info->prstatus->pr_fpvalid)
1771 		fill_note(info->notes + info->numnote++,
1772 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1773 #ifdef ELF_CORE_COPY_XFPREGS
1774 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1775 		fill_note(info->notes + info->numnote++,
1776 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1777 			  sizeof(*info->xfpu), info->xfpu);
1778 #endif
1779 
1780 	return 1;
1781 
1782 #undef NUM_NOTES
1783 }
1784 
1785 static size_t get_note_info_size(struct elf_note_info *info)
1786 {
1787 	int sz = 0;
1788 	int i;
1789 
1790 	for (i = 0; i < info->numnote; i++)
1791 		sz += notesize(info->notes + i);
1792 
1793 	sz += info->thread_status_size;
1794 
1795 	return sz;
1796 }
1797 
1798 static int write_note_info(struct elf_note_info *info,
1799 			   struct file *file, loff_t *foffset)
1800 {
1801 	int i;
1802 	struct list_head *t;
1803 
1804 	for (i = 0; i < info->numnote; i++)
1805 		if (!writenote(info->notes + i, file, foffset))
1806 			return 0;
1807 
1808 	/* write out the thread status notes section */
1809 	list_for_each(t, &info->thread_list) {
1810 		struct elf_thread_status *tmp =
1811 				list_entry(t, struct elf_thread_status, list);
1812 
1813 		for (i = 0; i < tmp->num_notes; i++)
1814 			if (!writenote(&tmp->notes[i], file, foffset))
1815 				return 0;
1816 	}
1817 
1818 	return 1;
1819 }
1820 
1821 static void free_note_info(struct elf_note_info *info)
1822 {
1823 	while (!list_empty(&info->thread_list)) {
1824 		struct list_head *tmp = info->thread_list.next;
1825 		list_del(tmp);
1826 		kfree(list_entry(tmp, struct elf_thread_status, list));
1827 	}
1828 
1829 	kfree(info->prstatus);
1830 	kfree(info->psinfo);
1831 	kfree(info->notes);
1832 	kfree(info->fpu);
1833 #ifdef ELF_CORE_COPY_XFPREGS
1834 	kfree(info->xfpu);
1835 #endif
1836 }
1837 
1838 #endif
1839 
1840 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1841 					struct vm_area_struct *gate_vma)
1842 {
1843 	struct vm_area_struct *ret = tsk->mm->mmap;
1844 
1845 	if (ret)
1846 		return ret;
1847 	return gate_vma;
1848 }
1849 /*
1850  * Helper function for iterating across a vma list.  It ensures that the caller
1851  * will visit `gate_vma' prior to terminating the search.
1852  */
1853 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1854 					struct vm_area_struct *gate_vma)
1855 {
1856 	struct vm_area_struct *ret;
1857 
1858 	ret = this_vma->vm_next;
1859 	if (ret)
1860 		return ret;
1861 	if (this_vma == gate_vma)
1862 		return NULL;
1863 	return gate_vma;
1864 }
1865 
1866 /*
1867  * Actual dumper
1868  *
1869  * This is a two-pass process; first we find the offsets of the bits,
1870  * and then they are actually written out.  If we run out of core limit
1871  * we just truncate.
1872  */
1873 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1874 {
1875 	int has_dumped = 0;
1876 	mm_segment_t fs;
1877 	int segs;
1878 	size_t size = 0;
1879 	struct vm_area_struct *vma, *gate_vma;
1880 	struct elfhdr *elf = NULL;
1881 	loff_t offset = 0, dataoff, foffset;
1882 	unsigned long mm_flags;
1883 	struct elf_note_info info;
1884 
1885 	/*
1886 	 * We no longer stop all VM operations.
1887 	 *
1888 	 * This is because those proceses that could possibly change map_count
1889 	 * or the mmap / vma pages are now blocked in do_exit on current
1890 	 * finishing this core dump.
1891 	 *
1892 	 * Only ptrace can touch these memory addresses, but it doesn't change
1893 	 * the map_count or the pages allocated. So no possibility of crashing
1894 	 * exists while dumping the mm->vm_next areas to the core file.
1895 	 */
1896 
1897 	/* alloc memory for large data structures: too large to be on stack */
1898 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1899 	if (!elf)
1900 		goto out;
1901 
1902 	segs = current->mm->map_count;
1903 #ifdef ELF_CORE_EXTRA_PHDRS
1904 	segs += ELF_CORE_EXTRA_PHDRS;
1905 #endif
1906 
1907 	gate_vma = get_gate_vma(current);
1908 	if (gate_vma != NULL)
1909 		segs++;
1910 
1911 	/*
1912 	 * Collect all the non-memory information about the process for the
1913 	 * notes.  This also sets up the file header.
1914 	 */
1915 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1916 			    &info, signr, regs))
1917 		goto cleanup;
1918 
1919 	has_dumped = 1;
1920 	current->flags |= PF_DUMPCORE;
1921 
1922 	fs = get_fs();
1923 	set_fs(KERNEL_DS);
1924 
1925 	DUMP_WRITE(elf, sizeof(*elf));
1926 	offset += sizeof(*elf);				/* Elf header */
1927 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1928 	foffset = offset;
1929 
1930 	/* Write notes phdr entry */
1931 	{
1932 		struct elf_phdr phdr;
1933 		size_t sz = get_note_info_size(&info);
1934 
1935 		sz += elf_coredump_extra_notes_size();
1936 
1937 		fill_elf_note_phdr(&phdr, sz, offset);
1938 		offset += sz;
1939 		DUMP_WRITE(&phdr, sizeof(phdr));
1940 	}
1941 
1942 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1943 
1944 	/*
1945 	 * We must use the same mm->flags while dumping core to avoid
1946 	 * inconsistency between the program headers and bodies, otherwise an
1947 	 * unusable core file can be generated.
1948 	 */
1949 	mm_flags = current->mm->flags;
1950 
1951 	/* Write program headers for segments dump */
1952 	for (vma = first_vma(current, gate_vma); vma != NULL;
1953 			vma = next_vma(vma, gate_vma)) {
1954 		struct elf_phdr phdr;
1955 
1956 		phdr.p_type = PT_LOAD;
1957 		phdr.p_offset = offset;
1958 		phdr.p_vaddr = vma->vm_start;
1959 		phdr.p_paddr = 0;
1960 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1961 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1962 		offset += phdr.p_filesz;
1963 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1964 		if (vma->vm_flags & VM_WRITE)
1965 			phdr.p_flags |= PF_W;
1966 		if (vma->vm_flags & VM_EXEC)
1967 			phdr.p_flags |= PF_X;
1968 		phdr.p_align = ELF_EXEC_PAGESIZE;
1969 
1970 		DUMP_WRITE(&phdr, sizeof(phdr));
1971 	}
1972 
1973 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1974 	ELF_CORE_WRITE_EXTRA_PHDRS;
1975 #endif
1976 
1977  	/* write out the notes section */
1978 	if (!write_note_info(&info, file, &foffset))
1979 		goto end_coredump;
1980 
1981 	if (elf_coredump_extra_notes_write(file, &foffset))
1982 		goto end_coredump;
1983 
1984 	/* Align to page */
1985 	DUMP_SEEK(dataoff - foffset);
1986 
1987 	for (vma = first_vma(current, gate_vma); vma != NULL;
1988 			vma = next_vma(vma, gate_vma)) {
1989 		unsigned long addr;
1990 		unsigned long end;
1991 
1992 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
1993 
1994 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1995 			struct page *page;
1996 			struct vm_area_struct *tmp_vma;
1997 
1998 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1999 						&page, &tmp_vma) <= 0) {
2000 				DUMP_SEEK(PAGE_SIZE);
2001 			} else {
2002 				if (page == ZERO_PAGE(0)) {
2003 					if (!dump_seek(file, PAGE_SIZE)) {
2004 						page_cache_release(page);
2005 						goto end_coredump;
2006 					}
2007 				} else {
2008 					void *kaddr;
2009 					flush_cache_page(tmp_vma, addr,
2010 							 page_to_pfn(page));
2011 					kaddr = kmap(page);
2012 					if ((size += PAGE_SIZE) > limit ||
2013 					    !dump_write(file, kaddr,
2014 					    PAGE_SIZE)) {
2015 						kunmap(page);
2016 						page_cache_release(page);
2017 						goto end_coredump;
2018 					}
2019 					kunmap(page);
2020 				}
2021 				page_cache_release(page);
2022 			}
2023 		}
2024 	}
2025 
2026 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2027 	ELF_CORE_WRITE_EXTRA_DATA;
2028 #endif
2029 
2030 end_coredump:
2031 	set_fs(fs);
2032 
2033 cleanup:
2034 	free_note_info(&info);
2035 	kfree(elf);
2036 out:
2037 	return has_dumped;
2038 }
2039 
2040 #endif		/* USE_ELF_CORE_DUMP */
2041 
2042 static int __init init_elf_binfmt(void)
2043 {
2044 	return register_binfmt(&elf_format);
2045 }
2046 
2047 static void __exit exit_elf_binfmt(void)
2048 {
2049 	/* Remove the COFF and ELF loaders. */
2050 	unregister_binfmt(&elf_format);
2051 }
2052 
2053 core_initcall(init_elf_binfmt);
2054 module_exit(exit_elf_binfmt);
2055 MODULE_LICENSE("GPL");
2056