xref: /linux/fs/binfmt_elf.c (revision 2d6ffcca623a9a16df6cdfbe8250b7a5904a5f5e)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 		unsigned long load_addr, unsigned long interp_load_addr)
137 {
138 	unsigned long p = bprm->p;
139 	int argc = bprm->argc;
140 	int envc = bprm->envc;
141 	elf_addr_t __user *argv;
142 	elf_addr_t __user *envp;
143 	elf_addr_t __user *sp;
144 	elf_addr_t __user *u_platform;
145 	const char *k_platform = ELF_PLATFORM;
146 	int items;
147 	elf_addr_t *elf_info;
148 	int ei_index = 0;
149 	struct task_struct *tsk = current;
150 	struct vm_area_struct *vma;
151 
152 	/*
153 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
154 	 * evictions by the processes running on the same package. One
155 	 * thing we can do is to shuffle the initial stack for them.
156 	 */
157 
158 	p = arch_align_stack(p);
159 
160 	/*
161 	 * If this architecture has a platform capability string, copy it
162 	 * to userspace.  In some cases (Sparc), this info is impossible
163 	 * for userspace to get any other way, in others (i386) it is
164 	 * merely difficult.
165 	 */
166 	u_platform = NULL;
167 	if (k_platform) {
168 		size_t len = strlen(k_platform) + 1;
169 
170 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171 		if (__copy_to_user(u_platform, k_platform, len))
172 			return -EFAULT;
173 	}
174 
175 	/* Create the ELF interpreter info */
176 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179 	do { \
180 		elf_info[ei_index++] = id; \
181 		elf_info[ei_index++] = val; \
182 	} while (0)
183 
184 #ifdef ARCH_DLINFO
185 	/*
186 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 	 * AUXV.
188 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189 	 * ARCH_DLINFO changes
190 	 */
191 	ARCH_DLINFO;
192 #endif
193 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
200 	NEW_AUX_ENT(AT_FLAGS, 0);
201 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202 	NEW_AUX_ENT(AT_UID, tsk->uid);
203 	NEW_AUX_ENT(AT_EUID, tsk->euid);
204 	NEW_AUX_ENT(AT_GID, tsk->gid);
205 	NEW_AUX_ENT(AT_EGID, tsk->egid);
206  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1) + 1;
226 	bprm->p = STACK_ROUND(sp, items);
227 
228 	/* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233 	sp = (elf_addr_t __user *)bprm->p;
234 #endif
235 
236 
237 	/*
238 	 * Grow the stack manually; some architectures have a limit on how
239 	 * far ahead a user-space access may be in order to grow the stack.
240 	 */
241 	vma = find_extend_vma(current->mm, bprm->p);
242 	if (!vma)
243 		return -EFAULT;
244 
245 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
246 	if (__put_user(argc, sp++))
247 		return -EFAULT;
248 	argv = sp;
249 	envp = argv + argc + 1;
250 
251 	/* Populate argv and envp */
252 	p = current->mm->arg_end = current->mm->arg_start;
253 	while (argc-- > 0) {
254 		size_t len;
255 		if (__put_user((elf_addr_t)p, argv++))
256 			return -EFAULT;
257 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258 		if (!len || len > MAX_ARG_STRLEN)
259 			return -EINVAL;
260 		p += len;
261 	}
262 	if (__put_user(0, argv))
263 		return -EFAULT;
264 	current->mm->arg_end = current->mm->env_start = p;
265 	while (envc-- > 0) {
266 		size_t len;
267 		if (__put_user((elf_addr_t)p, envp++))
268 			return -EFAULT;
269 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270 		if (!len || len > MAX_ARG_STRLEN)
271 			return -EINVAL;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type,
289 		unsigned long total_size)
290 {
291 	unsigned long map_addr;
292 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 	addr = ELF_PAGESTART(addr);
295 	size = ELF_PAGEALIGN(size);
296 
297 	/* mmap() will return -EINVAL if given a zero size, but a
298 	 * segment with zero filesize is perfectly valid */
299 	if (!size)
300 		return addr;
301 
302 	down_write(&current->mm->mmap_sem);
303 	/*
304 	* total_size is the size of the ELF (interpreter) image.
305 	* The _first_ mmap needs to know the full size, otherwise
306 	* randomization might put this image into an overlapping
307 	* position with the ELF binary image. (since size < total_size)
308 	* So we first map the 'big' image - and unmap the remainder at
309 	* the end. (which unmap is needed for ELF images with holes.)
310 	*/
311 	if (total_size) {
312 		total_size = ELF_PAGEALIGN(total_size);
313 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 		if (!BAD_ADDR(map_addr))
315 			do_munmap(current->mm, map_addr+size, total_size-size);
316 	} else
317 		map_addr = do_mmap(filep, addr, size, prot, type, off);
318 
319 	up_write(&current->mm->mmap_sem);
320 	return(map_addr);
321 }
322 
323 #endif /* !elf_map */
324 
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327 	int i, first_idx = -1, last_idx = -1;
328 
329 	for (i = 0; i < nr; i++) {
330 		if (cmds[i].p_type == PT_LOAD) {
331 			last_idx = i;
332 			if (first_idx == -1)
333 				first_idx = i;
334 		}
335 	}
336 	if (first_idx == -1)
337 		return 0;
338 
339 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342 
343 
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348 
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350 		struct file *interpreter, unsigned long *interp_map_addr,
351 		unsigned long no_base)
352 {
353 	struct elf_phdr *elf_phdata;
354 	struct elf_phdr *eppnt;
355 	unsigned long load_addr = 0;
356 	int load_addr_set = 0;
357 	unsigned long last_bss = 0, elf_bss = 0;
358 	unsigned long error = ~0UL;
359 	unsigned long total_size;
360 	int retval, i, size;
361 
362 	/* First of all, some simple consistency checks */
363 	if (interp_elf_ex->e_type != ET_EXEC &&
364 	    interp_elf_ex->e_type != ET_DYN)
365 		goto out;
366 	if (!elf_check_arch(interp_elf_ex))
367 		goto out;
368 	if (!interpreter->f_op || !interpreter->f_op->mmap)
369 		goto out;
370 
371 	/*
372 	 * If the size of this structure has changed, then punt, since
373 	 * we will be doing the wrong thing.
374 	 */
375 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 		goto out;
377 	if (interp_elf_ex->e_phnum < 1 ||
378 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 		goto out;
380 
381 	/* Now read in all of the header information */
382 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 	if (size > ELF_MIN_ALIGN)
384 		goto out;
385 	elf_phdata = kmalloc(size, GFP_KERNEL);
386 	if (!elf_phdata)
387 		goto out;
388 
389 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 			     (char *)elf_phdata,size);
391 	error = -EIO;
392 	if (retval != size) {
393 		if (retval < 0)
394 			error = retval;
395 		goto out_close;
396 	}
397 
398 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 	if (!total_size) {
400 		error = -EINVAL;
401 		goto out_close;
402 	}
403 
404 	eppnt = elf_phdata;
405 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 		if (eppnt->p_type == PT_LOAD) {
407 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 			int elf_prot = 0;
409 			unsigned long vaddr = 0;
410 			unsigned long k, map_addr;
411 
412 			if (eppnt->p_flags & PF_R)
413 		    		elf_prot = PROT_READ;
414 			if (eppnt->p_flags & PF_W)
415 				elf_prot |= PROT_WRITE;
416 			if (eppnt->p_flags & PF_X)
417 				elf_prot |= PROT_EXEC;
418 			vaddr = eppnt->p_vaddr;
419 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 				elf_type |= MAP_FIXED;
421 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 				load_addr = -vaddr;
423 
424 			map_addr = elf_map(interpreter, load_addr + vaddr,
425 					eppnt, elf_prot, elf_type, total_size);
426 			total_size = 0;
427 			if (!*interp_map_addr)
428 				*interp_map_addr = map_addr;
429 			error = map_addr;
430 			if (BAD_ADDR(map_addr))
431 				goto out_close;
432 
433 			if (!load_addr_set &&
434 			    interp_elf_ex->e_type == ET_DYN) {
435 				load_addr = map_addr - ELF_PAGESTART(vaddr);
436 				load_addr_set = 1;
437 			}
438 
439 			/*
440 			 * Check to see if the section's size will overflow the
441 			 * allowed task size. Note that p_filesz must always be
442 			 * <= p_memsize so it's only necessary to check p_memsz.
443 			 */
444 			k = load_addr + eppnt->p_vaddr;
445 			if (BAD_ADDR(k) ||
446 			    eppnt->p_filesz > eppnt->p_memsz ||
447 			    eppnt->p_memsz > TASK_SIZE ||
448 			    TASK_SIZE - eppnt->p_memsz < k) {
449 				error = -ENOMEM;
450 				goto out_close;
451 			}
452 
453 			/*
454 			 * Find the end of the file mapping for this phdr, and
455 			 * keep track of the largest address we see for this.
456 			 */
457 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 			if (k > elf_bss)
459 				elf_bss = k;
460 
461 			/*
462 			 * Do the same thing for the memory mapping - between
463 			 * elf_bss and last_bss is the bss section.
464 			 */
465 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 			if (k > last_bss)
467 				last_bss = k;
468 		}
469 	}
470 
471 	/*
472 	 * Now fill out the bss section.  First pad the last page up
473 	 * to the page boundary, and then perform a mmap to make sure
474 	 * that there are zero-mapped pages up to and including the
475 	 * last bss page.
476 	 */
477 	if (padzero(elf_bss)) {
478 		error = -EFAULT;
479 		goto out_close;
480 	}
481 
482 	/* What we have mapped so far */
483 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484 
485 	/* Map the last of the bss segment */
486 	if (last_bss > elf_bss) {
487 		down_write(&current->mm->mmap_sem);
488 		error = do_brk(elf_bss, last_bss - elf_bss);
489 		up_write(&current->mm->mmap_sem);
490 		if (BAD_ADDR(error))
491 			goto out_close;
492 	}
493 
494 	error = load_addr;
495 
496 out_close:
497 	kfree(elf_phdata);
498 out:
499 	return error;
500 }
501 
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506 
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if ((current->flags & PF_RANDOMIZE) &&
519 		!(current->personality & ADDR_NO_RANDOMIZE)) {
520 		random_variable = get_random_int() & STACK_RND_MASK;
521 		random_variable <<= PAGE_SHIFT;
522 	}
523 #ifdef CONFIG_STACK_GROWSUP
524 	return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526 	return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529 
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532 	struct file *interpreter = NULL; /* to shut gcc up */
533  	unsigned long load_addr = 0, load_bias = 0;
534 	int load_addr_set = 0;
535 	char * elf_interpreter = NULL;
536 	unsigned long error;
537 	struct elf_phdr *elf_ppnt, *elf_phdata;
538 	unsigned long elf_bss, elf_brk;
539 	int elf_exec_fileno;
540 	int retval, i;
541 	unsigned int size;
542 	unsigned long elf_entry;
543 	unsigned long interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	int executable_stack = EXSTACK_DEFAULT;
547 	unsigned long def_flags = 0;
548 	struct {
549 		struct elfhdr elf_ex;
550 		struct elfhdr interp_elf_ex;
551 	} *loc;
552 
553 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
554 	if (!loc) {
555 		retval = -ENOMEM;
556 		goto out_ret;
557 	}
558 
559 	/* Get the exec-header */
560 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
561 
562 	retval = -ENOEXEC;
563 	/* First of all, some simple consistency checks */
564 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
565 		goto out;
566 
567 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
568 		goto out;
569 	if (!elf_check_arch(&loc->elf_ex))
570 		goto out;
571 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
572 		goto out;
573 
574 	/* Now read in all of the header information */
575 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
576 		goto out;
577 	if (loc->elf_ex.e_phnum < 1 ||
578 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
579 		goto out;
580 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
581 	retval = -ENOMEM;
582 	elf_phdata = kmalloc(size, GFP_KERNEL);
583 	if (!elf_phdata)
584 		goto out;
585 
586 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
587 			     (char *)elf_phdata, size);
588 	if (retval != size) {
589 		if (retval >= 0)
590 			retval = -EIO;
591 		goto out_free_ph;
592 	}
593 
594 	retval = get_unused_fd();
595 	if (retval < 0)
596 		goto out_free_ph;
597 	get_file(bprm->file);
598 	fd_install(elf_exec_fileno = retval, bprm->file);
599 
600 	elf_ppnt = elf_phdata;
601 	elf_bss = 0;
602 	elf_brk = 0;
603 
604 	start_code = ~0UL;
605 	end_code = 0;
606 	start_data = 0;
607 	end_data = 0;
608 
609 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
610 		if (elf_ppnt->p_type == PT_INTERP) {
611 			/* This is the program interpreter used for
612 			 * shared libraries - for now assume that this
613 			 * is an a.out format binary
614 			 */
615 			retval = -ENOEXEC;
616 			if (elf_ppnt->p_filesz > PATH_MAX ||
617 			    elf_ppnt->p_filesz < 2)
618 				goto out_free_file;
619 
620 			retval = -ENOMEM;
621 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
622 						  GFP_KERNEL);
623 			if (!elf_interpreter)
624 				goto out_free_file;
625 
626 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
627 					     elf_interpreter,
628 					     elf_ppnt->p_filesz);
629 			if (retval != elf_ppnt->p_filesz) {
630 				if (retval >= 0)
631 					retval = -EIO;
632 				goto out_free_interp;
633 			}
634 			/* make sure path is NULL terminated */
635 			retval = -ENOEXEC;
636 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
637 				goto out_free_interp;
638 
639 			/*
640 			 * The early SET_PERSONALITY here is so that the lookup
641 			 * for the interpreter happens in the namespace of the
642 			 * to-be-execed image.  SET_PERSONALITY can select an
643 			 * alternate root.
644 			 *
645 			 * However, SET_PERSONALITY is NOT allowed to switch
646 			 * this task into the new images's memory mapping
647 			 * policy - that is, TASK_SIZE must still evaluate to
648 			 * that which is appropriate to the execing application.
649 			 * This is because exit_mmap() needs to have TASK_SIZE
650 			 * evaluate to the size of the old image.
651 			 *
652 			 * So if (say) a 64-bit application is execing a 32-bit
653 			 * application it is the architecture's responsibility
654 			 * to defer changing the value of TASK_SIZE until the
655 			 * switch really is going to happen - do this in
656 			 * flush_thread().	- akpm
657 			 */
658 			SET_PERSONALITY(loc->elf_ex, 0);
659 
660 			interpreter = open_exec(elf_interpreter);
661 			retval = PTR_ERR(interpreter);
662 			if (IS_ERR(interpreter))
663 				goto out_free_interp;
664 
665 			/*
666 			 * If the binary is not readable then enforce
667 			 * mm->dumpable = 0 regardless of the interpreter's
668 			 * permissions.
669 			 */
670 			if (file_permission(interpreter, MAY_READ) < 0)
671 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
672 
673 			retval = kernel_read(interpreter, 0, bprm->buf,
674 					     BINPRM_BUF_SIZE);
675 			if (retval != BINPRM_BUF_SIZE) {
676 				if (retval >= 0)
677 					retval = -EIO;
678 				goto out_free_dentry;
679 			}
680 
681 			/* Get the exec headers */
682 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683 			break;
684 		}
685 		elf_ppnt++;
686 	}
687 
688 	elf_ppnt = elf_phdata;
689 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690 		if (elf_ppnt->p_type == PT_GNU_STACK) {
691 			if (elf_ppnt->p_flags & PF_X)
692 				executable_stack = EXSTACK_ENABLE_X;
693 			else
694 				executable_stack = EXSTACK_DISABLE_X;
695 			break;
696 		}
697 
698 	/* Some simple consistency checks for the interpreter */
699 	if (elf_interpreter) {
700 		retval = -ELIBBAD;
701 		/* Not an ELF interpreter */
702 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703 			goto out_free_dentry;
704 		/* Verify the interpreter has a valid arch */
705 		if (!elf_check_arch(&loc->interp_elf_ex))
706 			goto out_free_dentry;
707 	} else {
708 		/* Executables without an interpreter also need a personality  */
709 		SET_PERSONALITY(loc->elf_ex, 0);
710 	}
711 
712 	/* Flush all traces of the currently running executable */
713 	retval = flush_old_exec(bprm);
714 	if (retval)
715 		goto out_free_dentry;
716 
717 	/* OK, This is the point of no return */
718 	current->flags &= ~PF_FORKNOEXEC;
719 	current->mm->def_flags = def_flags;
720 
721 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
722 	   may depend on the personality.  */
723 	SET_PERSONALITY(loc->elf_ex, 0);
724 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725 		current->personality |= READ_IMPLIES_EXEC;
726 
727 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728 		current->flags |= PF_RANDOMIZE;
729 	arch_pick_mmap_layout(current->mm);
730 
731 	/* Do this so that we can load the interpreter, if need be.  We will
732 	   change some of these later */
733 	current->mm->free_area_cache = current->mm->mmap_base;
734 	current->mm->cached_hole_size = 0;
735 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
736 				 executable_stack);
737 	if (retval < 0) {
738 		send_sig(SIGKILL, current, 0);
739 		goto out_free_dentry;
740 	}
741 
742 	current->mm->start_stack = bprm->p;
743 
744 	/* Now we do a little grungy work by mmaping the ELF image into
745 	   the correct location in memory. */
746 	for(i = 0, elf_ppnt = elf_phdata;
747 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
748 		int elf_prot = 0, elf_flags;
749 		unsigned long k, vaddr;
750 
751 		if (elf_ppnt->p_type != PT_LOAD)
752 			continue;
753 
754 		if (unlikely (elf_brk > elf_bss)) {
755 			unsigned long nbyte;
756 
757 			/* There was a PT_LOAD segment with p_memsz > p_filesz
758 			   before this one. Map anonymous pages, if needed,
759 			   and clear the area.  */
760 			retval = set_brk (elf_bss + load_bias,
761 					  elf_brk + load_bias);
762 			if (retval) {
763 				send_sig(SIGKILL, current, 0);
764 				goto out_free_dentry;
765 			}
766 			nbyte = ELF_PAGEOFFSET(elf_bss);
767 			if (nbyte) {
768 				nbyte = ELF_MIN_ALIGN - nbyte;
769 				if (nbyte > elf_brk - elf_bss)
770 					nbyte = elf_brk - elf_bss;
771 				if (clear_user((void __user *)elf_bss +
772 							load_bias, nbyte)) {
773 					/*
774 					 * This bss-zeroing can fail if the ELF
775 					 * file specifies odd protections. So
776 					 * we don't check the return value
777 					 */
778 				}
779 			}
780 		}
781 
782 		if (elf_ppnt->p_flags & PF_R)
783 			elf_prot |= PROT_READ;
784 		if (elf_ppnt->p_flags & PF_W)
785 			elf_prot |= PROT_WRITE;
786 		if (elf_ppnt->p_flags & PF_X)
787 			elf_prot |= PROT_EXEC;
788 
789 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790 
791 		vaddr = elf_ppnt->p_vaddr;
792 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793 			elf_flags |= MAP_FIXED;
794 		} else if (loc->elf_ex.e_type == ET_DYN) {
795 			/* Try and get dynamic programs out of the way of the
796 			 * default mmap base, as well as whatever program they
797 			 * might try to exec.  This is because the brk will
798 			 * follow the loader, and is not movable.  */
799 #ifdef CONFIG_X86
800 			load_bias = 0;
801 #else
802 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
803 #endif
804 		}
805 
806 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807 				elf_prot, elf_flags, 0);
808 		if (BAD_ADDR(error)) {
809 			send_sig(SIGKILL, current, 0);
810 			retval = IS_ERR((void *)error) ?
811 				PTR_ERR((void*)error) : -EINVAL;
812 			goto out_free_dentry;
813 		}
814 
815 		if (!load_addr_set) {
816 			load_addr_set = 1;
817 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818 			if (loc->elf_ex.e_type == ET_DYN) {
819 				load_bias += error -
820 				             ELF_PAGESTART(load_bias + vaddr);
821 				load_addr += load_bias;
822 				reloc_func_desc = load_bias;
823 			}
824 		}
825 		k = elf_ppnt->p_vaddr;
826 		if (k < start_code)
827 			start_code = k;
828 		if (start_data < k)
829 			start_data = k;
830 
831 		/*
832 		 * Check to see if the section's size will overflow the
833 		 * allowed task size. Note that p_filesz must always be
834 		 * <= p_memsz so it is only necessary to check p_memsz.
835 		 */
836 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837 		    elf_ppnt->p_memsz > TASK_SIZE ||
838 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
839 			/* set_brk can never work. Avoid overflows. */
840 			send_sig(SIGKILL, current, 0);
841 			retval = -EINVAL;
842 			goto out_free_dentry;
843 		}
844 
845 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
846 
847 		if (k > elf_bss)
848 			elf_bss = k;
849 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
850 			end_code = k;
851 		if (end_data < k)
852 			end_data = k;
853 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
854 		if (k > elf_brk)
855 			elf_brk = k;
856 	}
857 
858 	loc->elf_ex.e_entry += load_bias;
859 	elf_bss += load_bias;
860 	elf_brk += load_bias;
861 	start_code += load_bias;
862 	end_code += load_bias;
863 	start_data += load_bias;
864 	end_data += load_bias;
865 
866 	/* Calling set_brk effectively mmaps the pages that we need
867 	 * for the bss and break sections.  We must do this before
868 	 * mapping in the interpreter, to make sure it doesn't wind
869 	 * up getting placed where the bss needs to go.
870 	 */
871 	retval = set_brk(elf_bss, elf_brk);
872 	if (retval) {
873 		send_sig(SIGKILL, current, 0);
874 		goto out_free_dentry;
875 	}
876 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877 		send_sig(SIGSEGV, current, 0);
878 		retval = -EFAULT; /* Nobody gets to see this, but.. */
879 		goto out_free_dentry;
880 	}
881 
882 	if (elf_interpreter) {
883 		unsigned long uninitialized_var(interp_map_addr);
884 
885 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
886 					    interpreter,
887 					    &interp_map_addr,
888 					    load_bias);
889 		if (!IS_ERR((void *)elf_entry)) {
890 			/*
891 			 * load_elf_interp() returns relocation
892 			 * adjustment
893 			 */
894 			interp_load_addr = elf_entry;
895 			elf_entry += loc->interp_elf_ex.e_entry;
896 		}
897 		if (BAD_ADDR(elf_entry)) {
898 			force_sig(SIGSEGV, current);
899 			retval = IS_ERR((void *)elf_entry) ?
900 					(int)elf_entry : -EINVAL;
901 			goto out_free_dentry;
902 		}
903 		reloc_func_desc = interp_load_addr;
904 
905 		allow_write_access(interpreter);
906 		fput(interpreter);
907 		kfree(elf_interpreter);
908 	} else {
909 		elf_entry = loc->elf_ex.e_entry;
910 		if (BAD_ADDR(elf_entry)) {
911 			force_sig(SIGSEGV, current);
912 			retval = -EINVAL;
913 			goto out_free_dentry;
914 		}
915 	}
916 
917 	kfree(elf_phdata);
918 
919 	sys_close(elf_exec_fileno);
920 
921 	set_binfmt(&elf_format);
922 
923 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
924 	retval = arch_setup_additional_pages(bprm, executable_stack);
925 	if (retval < 0) {
926 		send_sig(SIGKILL, current, 0);
927 		goto out;
928 	}
929 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
930 
931 	compute_creds(bprm);
932 	current->flags &= ~PF_FORKNOEXEC;
933 	retval = create_elf_tables(bprm, &loc->elf_ex,
934 			  load_addr, interp_load_addr);
935 	if (retval < 0) {
936 		send_sig(SIGKILL, current, 0);
937 		goto out;
938 	}
939 	/* N.B. passed_fileno might not be initialized? */
940 	current->mm->end_code = end_code;
941 	current->mm->start_code = start_code;
942 	current->mm->start_data = start_data;
943 	current->mm->end_data = end_data;
944 	current->mm->start_stack = bprm->p;
945 
946 #ifdef arch_randomize_brk
947 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
948 		current->mm->brk = current->mm->start_brk =
949 			arch_randomize_brk(current->mm);
950 #endif
951 
952 	if (current->personality & MMAP_PAGE_ZERO) {
953 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
954 		   and some applications "depend" upon this behavior.
955 		   Since we do not have the power to recompile these, we
956 		   emulate the SVr4 behavior. Sigh. */
957 		down_write(&current->mm->mmap_sem);
958 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959 				MAP_FIXED | MAP_PRIVATE, 0);
960 		up_write(&current->mm->mmap_sem);
961 	}
962 
963 #ifdef ELF_PLAT_INIT
964 	/*
965 	 * The ABI may specify that certain registers be set up in special
966 	 * ways (on i386 %edx is the address of a DT_FINI function, for
967 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968 	 * that the e_entry field is the address of the function descriptor
969 	 * for the startup routine, rather than the address of the startup
970 	 * routine itself.  This macro performs whatever initialization to
971 	 * the regs structure is required as well as any relocations to the
972 	 * function descriptor entries when executing dynamically links apps.
973 	 */
974 	ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976 
977 	start_thread(regs, elf_entry, bprm->p);
978 	if (unlikely(current->ptrace & PT_PTRACED)) {
979 		if (current->ptrace & PT_TRACE_EXEC)
980 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
981 		else
982 			send_sig(SIGTRAP, current, 0);
983 	}
984 	retval = 0;
985 out:
986 	kfree(loc);
987 out_ret:
988 	return retval;
989 
990 	/* error cleanup */
991 out_free_dentry:
992 	allow_write_access(interpreter);
993 	if (interpreter)
994 		fput(interpreter);
995 out_free_interp:
996 	kfree(elf_interpreter);
997 out_free_file:
998 	sys_close(elf_exec_fileno);
999 out_free_ph:
1000 	kfree(elf_phdata);
1001 	goto out;
1002 }
1003 
1004 /* This is really simpleminded and specialized - we are loading an
1005    a.out library that is given an ELF header. */
1006 static int load_elf_library(struct file *file)
1007 {
1008 	struct elf_phdr *elf_phdata;
1009 	struct elf_phdr *eppnt;
1010 	unsigned long elf_bss, bss, len;
1011 	int retval, error, i, j;
1012 	struct elfhdr elf_ex;
1013 
1014 	error = -ENOEXEC;
1015 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1016 	if (retval != sizeof(elf_ex))
1017 		goto out;
1018 
1019 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1020 		goto out;
1021 
1022 	/* First of all, some simple consistency checks */
1023 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1024 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1025 		goto out;
1026 
1027 	/* Now read in all of the header information */
1028 
1029 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1030 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1031 
1032 	error = -ENOMEM;
1033 	elf_phdata = kmalloc(j, GFP_KERNEL);
1034 	if (!elf_phdata)
1035 		goto out;
1036 
1037 	eppnt = elf_phdata;
1038 	error = -ENOEXEC;
1039 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1040 	if (retval != j)
1041 		goto out_free_ph;
1042 
1043 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044 		if ((eppnt + i)->p_type == PT_LOAD)
1045 			j++;
1046 	if (j != 1)
1047 		goto out_free_ph;
1048 
1049 	while (eppnt->p_type != PT_LOAD)
1050 		eppnt++;
1051 
1052 	/* Now use mmap to map the library into memory. */
1053 	down_write(&current->mm->mmap_sem);
1054 	error = do_mmap(file,
1055 			ELF_PAGESTART(eppnt->p_vaddr),
1056 			(eppnt->p_filesz +
1057 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058 			PROT_READ | PROT_WRITE | PROT_EXEC,
1059 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060 			(eppnt->p_offset -
1061 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062 	up_write(&current->mm->mmap_sem);
1063 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1064 		goto out_free_ph;
1065 
1066 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1067 	if (padzero(elf_bss)) {
1068 		error = -EFAULT;
1069 		goto out_free_ph;
1070 	}
1071 
1072 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1073 			    ELF_MIN_ALIGN - 1);
1074 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1075 	if (bss > len) {
1076 		down_write(&current->mm->mmap_sem);
1077 		do_brk(len, bss - len);
1078 		up_write(&current->mm->mmap_sem);
1079 	}
1080 	error = 0;
1081 
1082 out_free_ph:
1083 	kfree(elf_phdata);
1084 out:
1085 	return error;
1086 }
1087 
1088 /*
1089  * Note that some platforms still use traditional core dumps and not
1090  * the ELF core dump.  Each platform can select it as appropriate.
1091  */
1092 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1093 
1094 /*
1095  * ELF core dumper
1096  *
1097  * Modelled on fs/exec.c:aout_core_dump()
1098  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1099  */
1100 /*
1101  * These are the only things you should do on a core-file: use only these
1102  * functions to write out all the necessary info.
1103  */
1104 static int dump_write(struct file *file, const void *addr, int nr)
1105 {
1106 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1107 }
1108 
1109 static int dump_seek(struct file *file, loff_t off)
1110 {
1111 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1112 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1113 			return 0;
1114 	} else {
1115 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1116 		if (!buf)
1117 			return 0;
1118 		while (off > 0) {
1119 			unsigned long n = off;
1120 			if (n > PAGE_SIZE)
1121 				n = PAGE_SIZE;
1122 			if (!dump_write(file, buf, n))
1123 				return 0;
1124 			off -= n;
1125 		}
1126 		free_page((unsigned long)buf);
1127 	}
1128 	return 1;
1129 }
1130 
1131 /*
1132  * Decide what to dump of a segment, part, all or none.
1133  */
1134 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1135 				   unsigned long mm_flags)
1136 {
1137 	/* The vma can be set up to tell us the answer directly.  */
1138 	if (vma->vm_flags & VM_ALWAYSDUMP)
1139 		goto whole;
1140 
1141 	/* Do not dump I/O mapped devices or special mappings */
1142 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1143 		return 0;
1144 
1145 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1146 
1147 	/* By default, dump shared memory if mapped from an anonymous file. */
1148 	if (vma->vm_flags & VM_SHARED) {
1149 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1150 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151 			goto whole;
1152 		return 0;
1153 	}
1154 
1155 	/* Dump segments that have been written to.  */
1156 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157 		goto whole;
1158 	if (vma->vm_file == NULL)
1159 		return 0;
1160 
1161 	if (FILTER(MAPPED_PRIVATE))
1162 		goto whole;
1163 
1164 	/*
1165 	 * If this looks like the beginning of a DSO or executable mapping,
1166 	 * check for an ELF header.  If we find one, dump the first page to
1167 	 * aid in determining what was mapped here.
1168 	 */
1169 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1170 		u32 __user *header = (u32 __user *) vma->vm_start;
1171 		u32 word;
1172 		/*
1173 		 * Doing it this way gets the constant folded by GCC.
1174 		 */
1175 		union {
1176 			u32 cmp;
1177 			char elfmag[SELFMAG];
1178 		} magic;
1179 		BUILD_BUG_ON(SELFMAG != sizeof word);
1180 		magic.elfmag[EI_MAG0] = ELFMAG0;
1181 		magic.elfmag[EI_MAG1] = ELFMAG1;
1182 		magic.elfmag[EI_MAG2] = ELFMAG2;
1183 		magic.elfmag[EI_MAG3] = ELFMAG3;
1184 		if (get_user(word, header) == 0 && word == magic.cmp)
1185 			return PAGE_SIZE;
1186 	}
1187 
1188 #undef	FILTER
1189 
1190 	return 0;
1191 
1192 whole:
1193 	return vma->vm_end - vma->vm_start;
1194 }
1195 
1196 /* An ELF note in memory */
1197 struct memelfnote
1198 {
1199 	const char *name;
1200 	int type;
1201 	unsigned int datasz;
1202 	void *data;
1203 };
1204 
1205 static int notesize(struct memelfnote *en)
1206 {
1207 	int sz;
1208 
1209 	sz = sizeof(struct elf_note);
1210 	sz += roundup(strlen(en->name) + 1, 4);
1211 	sz += roundup(en->datasz, 4);
1212 
1213 	return sz;
1214 }
1215 
1216 #define DUMP_WRITE(addr, nr, foffset)	\
1217 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1218 
1219 static int alignfile(struct file *file, loff_t *foffset)
1220 {
1221 	static const char buf[4] = { 0, };
1222 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1223 	return 1;
1224 }
1225 
1226 static int writenote(struct memelfnote *men, struct file *file,
1227 			loff_t *foffset)
1228 {
1229 	struct elf_note en;
1230 	en.n_namesz = strlen(men->name) + 1;
1231 	en.n_descsz = men->datasz;
1232 	en.n_type = men->type;
1233 
1234 	DUMP_WRITE(&en, sizeof(en), foffset);
1235 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1236 	if (!alignfile(file, foffset))
1237 		return 0;
1238 	DUMP_WRITE(men->data, men->datasz, foffset);
1239 	if (!alignfile(file, foffset))
1240 		return 0;
1241 
1242 	return 1;
1243 }
1244 #undef DUMP_WRITE
1245 
1246 #define DUMP_WRITE(addr, nr)	\
1247 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1248 		goto end_coredump;
1249 #define DUMP_SEEK(off)	\
1250 	if (!dump_seek(file, (off))) \
1251 		goto end_coredump;
1252 
1253 static void fill_elf_header(struct elfhdr *elf, int segs,
1254 			    u16 machine, u32 flags, u8 osabi)
1255 {
1256 	memset(elf, 0, sizeof(*elf));
1257 
1258 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1260 	elf->e_ident[EI_DATA] = ELF_DATA;
1261 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1262 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1263 
1264 	elf->e_type = ET_CORE;
1265 	elf->e_machine = machine;
1266 	elf->e_version = EV_CURRENT;
1267 	elf->e_phoff = sizeof(struct elfhdr);
1268 	elf->e_flags = flags;
1269 	elf->e_ehsize = sizeof(struct elfhdr);
1270 	elf->e_phentsize = sizeof(struct elf_phdr);
1271 	elf->e_phnum = segs;
1272 
1273 	return;
1274 }
1275 
1276 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1277 {
1278 	phdr->p_type = PT_NOTE;
1279 	phdr->p_offset = offset;
1280 	phdr->p_vaddr = 0;
1281 	phdr->p_paddr = 0;
1282 	phdr->p_filesz = sz;
1283 	phdr->p_memsz = 0;
1284 	phdr->p_flags = 0;
1285 	phdr->p_align = 0;
1286 	return;
1287 }
1288 
1289 static void fill_note(struct memelfnote *note, const char *name, int type,
1290 		unsigned int sz, void *data)
1291 {
1292 	note->name = name;
1293 	note->type = type;
1294 	note->datasz = sz;
1295 	note->data = data;
1296 	return;
1297 }
1298 
1299 /*
1300  * fill up all the fields in prstatus from the given task struct, except
1301  * registers which need to be filled up separately.
1302  */
1303 static void fill_prstatus(struct elf_prstatus *prstatus,
1304 		struct task_struct *p, long signr)
1305 {
1306 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1307 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1308 	prstatus->pr_sighold = p->blocked.sig[0];
1309 	prstatus->pr_pid = task_pid_vnr(p);
1310 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1311 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1312 	prstatus->pr_sid = task_session_vnr(p);
1313 	if (thread_group_leader(p)) {
1314 		/*
1315 		 * This is the record for the group leader.  Add in the
1316 		 * cumulative times of previous dead threads.  This total
1317 		 * won't include the time of each live thread whose state
1318 		 * is included in the core dump.  The final total reported
1319 		 * to our parent process when it calls wait4 will include
1320 		 * those sums as well as the little bit more time it takes
1321 		 * this and each other thread to finish dying after the
1322 		 * core dump synchronization phase.
1323 		 */
1324 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1325 				   &prstatus->pr_utime);
1326 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1327 				   &prstatus->pr_stime);
1328 	} else {
1329 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1330 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1331 	}
1332 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1333 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1334 }
1335 
1336 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1337 		       struct mm_struct *mm)
1338 {
1339 	unsigned int i, len;
1340 
1341 	/* first copy the parameters from user space */
1342 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1343 
1344 	len = mm->arg_end - mm->arg_start;
1345 	if (len >= ELF_PRARGSZ)
1346 		len = ELF_PRARGSZ-1;
1347 	if (copy_from_user(&psinfo->pr_psargs,
1348 		           (const char __user *)mm->arg_start, len))
1349 		return -EFAULT;
1350 	for(i = 0; i < len; i++)
1351 		if (psinfo->pr_psargs[i] == 0)
1352 			psinfo->pr_psargs[i] = ' ';
1353 	psinfo->pr_psargs[len] = 0;
1354 
1355 	psinfo->pr_pid = task_pid_vnr(p);
1356 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1357 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1358 	psinfo->pr_sid = task_session_vnr(p);
1359 
1360 	i = p->state ? ffz(~p->state) + 1 : 0;
1361 	psinfo->pr_state = i;
1362 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1363 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1364 	psinfo->pr_nice = task_nice(p);
1365 	psinfo->pr_flag = p->flags;
1366 	SET_UID(psinfo->pr_uid, p->uid);
1367 	SET_GID(psinfo->pr_gid, p->gid);
1368 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1369 
1370 	return 0;
1371 }
1372 
1373 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1374 {
1375 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376 	int i = 0;
1377 	do
1378 		i += 2;
1379 	while (auxv[i - 2] != AT_NULL);
1380 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1381 }
1382 
1383 #ifdef CORE_DUMP_USE_REGSET
1384 #include <linux/regset.h>
1385 
1386 struct elf_thread_core_info {
1387 	struct elf_thread_core_info *next;
1388 	struct task_struct *task;
1389 	struct elf_prstatus prstatus;
1390 	struct memelfnote notes[0];
1391 };
1392 
1393 struct elf_note_info {
1394 	struct elf_thread_core_info *thread;
1395 	struct memelfnote psinfo;
1396 	struct memelfnote auxv;
1397 	size_t size;
1398 	int thread_notes;
1399 };
1400 
1401 /*
1402  * When a regset has a writeback hook, we call it on each thread before
1403  * dumping user memory.  On register window machines, this makes sure the
1404  * user memory backing the register data is up to date before we read it.
1405  */
1406 static void do_thread_regset_writeback(struct task_struct *task,
1407 				       const struct user_regset *regset)
1408 {
1409 	if (regset->writeback)
1410 		regset->writeback(task, regset, 1);
1411 }
1412 
1413 static int fill_thread_core_info(struct elf_thread_core_info *t,
1414 				 const struct user_regset_view *view,
1415 				 long signr, size_t *total)
1416 {
1417 	unsigned int i;
1418 
1419 	/*
1420 	 * NT_PRSTATUS is the one special case, because the regset data
1421 	 * goes into the pr_reg field inside the note contents, rather
1422 	 * than being the whole note contents.  We fill the reset in here.
1423 	 * We assume that regset 0 is NT_PRSTATUS.
1424 	 */
1425 	fill_prstatus(&t->prstatus, t->task, signr);
1426 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1427 				    0, sizeof(t->prstatus.pr_reg),
1428 				    &t->prstatus.pr_reg, NULL);
1429 
1430 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1431 		  sizeof(t->prstatus), &t->prstatus);
1432 	*total += notesize(&t->notes[0]);
1433 
1434 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1435 
1436 	/*
1437 	 * Each other regset might generate a note too.  For each regset
1438 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1439 	 * all zero and we'll know to skip writing it later.
1440 	 */
1441 	for (i = 1; i < view->n; ++i) {
1442 		const struct user_regset *regset = &view->regsets[i];
1443 		do_thread_regset_writeback(t->task, regset);
1444 		if (regset->core_note_type &&
1445 		    (!regset->active || regset->active(t->task, regset))) {
1446 			int ret;
1447 			size_t size = regset->n * regset->size;
1448 			void *data = kmalloc(size, GFP_KERNEL);
1449 			if (unlikely(!data))
1450 				return 0;
1451 			ret = regset->get(t->task, regset,
1452 					  0, size, data, NULL);
1453 			if (unlikely(ret))
1454 				kfree(data);
1455 			else {
1456 				if (regset->core_note_type != NT_PRFPREG)
1457 					fill_note(&t->notes[i], "LINUX",
1458 						  regset->core_note_type,
1459 						  size, data);
1460 				else {
1461 					t->prstatus.pr_fpvalid = 1;
1462 					fill_note(&t->notes[i], "CORE",
1463 						  NT_PRFPREG, size, data);
1464 				}
1465 				*total += notesize(&t->notes[i]);
1466 			}
1467 		}
1468 	}
1469 
1470 	return 1;
1471 }
1472 
1473 static int fill_note_info(struct elfhdr *elf, int phdrs,
1474 			  struct elf_note_info *info,
1475 			  long signr, struct pt_regs *regs)
1476 {
1477 	struct task_struct *dump_task = current;
1478 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1479 	struct elf_thread_core_info *t;
1480 	struct elf_prpsinfo *psinfo;
1481 	struct task_struct *g, *p;
1482 	unsigned int i;
1483 
1484 	info->size = 0;
1485 	info->thread = NULL;
1486 
1487 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1488 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1489 
1490 	if (psinfo == NULL)
1491 		return 0;
1492 
1493 	/*
1494 	 * Figure out how many notes we're going to need for each thread.
1495 	 */
1496 	info->thread_notes = 0;
1497 	for (i = 0; i < view->n; ++i)
1498 		if (view->regsets[i].core_note_type != 0)
1499 			++info->thread_notes;
1500 
1501 	/*
1502 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1503 	 * since it is our one special case.
1504 	 */
1505 	if (unlikely(info->thread_notes == 0) ||
1506 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1507 		WARN_ON(1);
1508 		return 0;
1509 	}
1510 
1511 	/*
1512 	 * Initialize the ELF file header.
1513 	 */
1514 	fill_elf_header(elf, phdrs,
1515 			view->e_machine, view->e_flags, view->ei_osabi);
1516 
1517 	/*
1518 	 * Allocate a structure for each thread.
1519 	 */
1520 	rcu_read_lock();
1521 	do_each_thread(g, p)
1522 		if (p->mm == dump_task->mm) {
1523 			t = kzalloc(offsetof(struct elf_thread_core_info,
1524 					     notes[info->thread_notes]),
1525 				    GFP_ATOMIC);
1526 			if (unlikely(!t)) {
1527 				rcu_read_unlock();
1528 				return 0;
1529 			}
1530 			t->task = p;
1531 			if (p == dump_task || !info->thread) {
1532 				t->next = info->thread;
1533 				info->thread = t;
1534 			} else {
1535 				/*
1536 				 * Make sure to keep the original task at
1537 				 * the head of the list.
1538 				 */
1539 				t->next = info->thread->next;
1540 				info->thread->next = t;
1541 			}
1542 		}
1543 	while_each_thread(g, p);
1544 	rcu_read_unlock();
1545 
1546 	/*
1547 	 * Now fill in each thread's information.
1548 	 */
1549 	for (t = info->thread; t != NULL; t = t->next)
1550 		if (!fill_thread_core_info(t, view, signr, &info->size))
1551 			return 0;
1552 
1553 	/*
1554 	 * Fill in the two process-wide notes.
1555 	 */
1556 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1557 	info->size += notesize(&info->psinfo);
1558 
1559 	fill_auxv_note(&info->auxv, current->mm);
1560 	info->size += notesize(&info->auxv);
1561 
1562 	return 1;
1563 }
1564 
1565 static size_t get_note_info_size(struct elf_note_info *info)
1566 {
1567 	return info->size;
1568 }
1569 
1570 /*
1571  * Write all the notes for each thread.  When writing the first thread, the
1572  * process-wide notes are interleaved after the first thread-specific note.
1573  */
1574 static int write_note_info(struct elf_note_info *info,
1575 			   struct file *file, loff_t *foffset)
1576 {
1577 	bool first = 1;
1578 	struct elf_thread_core_info *t = info->thread;
1579 
1580 	do {
1581 		int i;
1582 
1583 		if (!writenote(&t->notes[0], file, foffset))
1584 			return 0;
1585 
1586 		if (first && !writenote(&info->psinfo, file, foffset))
1587 			return 0;
1588 		if (first && !writenote(&info->auxv, file, foffset))
1589 			return 0;
1590 
1591 		for (i = 1; i < info->thread_notes; ++i)
1592 			if (t->notes[i].data &&
1593 			    !writenote(&t->notes[i], file, foffset))
1594 				return 0;
1595 
1596 		first = 0;
1597 		t = t->next;
1598 	} while (t);
1599 
1600 	return 1;
1601 }
1602 
1603 static void free_note_info(struct elf_note_info *info)
1604 {
1605 	struct elf_thread_core_info *threads = info->thread;
1606 	while (threads) {
1607 		unsigned int i;
1608 		struct elf_thread_core_info *t = threads;
1609 		threads = t->next;
1610 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1611 		for (i = 1; i < info->thread_notes; ++i)
1612 			kfree(t->notes[i].data);
1613 		kfree(t);
1614 	}
1615 	kfree(info->psinfo.data);
1616 }
1617 
1618 #else
1619 
1620 /* Here is the structure in which status of each thread is captured. */
1621 struct elf_thread_status
1622 {
1623 	struct list_head list;
1624 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1625 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1626 	struct task_struct *thread;
1627 #ifdef ELF_CORE_COPY_XFPREGS
1628 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1629 #endif
1630 	struct memelfnote notes[3];
1631 	int num_notes;
1632 };
1633 
1634 /*
1635  * In order to add the specific thread information for the elf file format,
1636  * we need to keep a linked list of every threads pr_status and then create
1637  * a single section for them in the final core file.
1638  */
1639 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1640 {
1641 	int sz = 0;
1642 	struct task_struct *p = t->thread;
1643 	t->num_notes = 0;
1644 
1645 	fill_prstatus(&t->prstatus, p, signr);
1646 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1647 
1648 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1649 		  &(t->prstatus));
1650 	t->num_notes++;
1651 	sz += notesize(&t->notes[0]);
1652 
1653 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1654 								&t->fpu))) {
1655 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1656 			  &(t->fpu));
1657 		t->num_notes++;
1658 		sz += notesize(&t->notes[1]);
1659 	}
1660 
1661 #ifdef ELF_CORE_COPY_XFPREGS
1662 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1663 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1664 			  sizeof(t->xfpu), &t->xfpu);
1665 		t->num_notes++;
1666 		sz += notesize(&t->notes[2]);
1667 	}
1668 #endif
1669 	return sz;
1670 }
1671 
1672 struct elf_note_info {
1673 	struct memelfnote *notes;
1674 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1675 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1676 	struct list_head thread_list;
1677 	elf_fpregset_t *fpu;
1678 #ifdef ELF_CORE_COPY_XFPREGS
1679 	elf_fpxregset_t *xfpu;
1680 #endif
1681 	int thread_status_size;
1682 	int numnote;
1683 };
1684 
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 			  struct elf_note_info *info,
1687 			  long signr, struct pt_regs *regs)
1688 {
1689 #define	NUM_NOTES	6
1690 	struct list_head *t;
1691 	struct task_struct *g, *p;
1692 
1693 	info->notes = NULL;
1694 	info->prstatus = NULL;
1695 	info->psinfo = NULL;
1696 	info->fpu = NULL;
1697 #ifdef ELF_CORE_COPY_XFPREGS
1698 	info->xfpu = NULL;
1699 #endif
1700 	INIT_LIST_HEAD(&info->thread_list);
1701 
1702 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1703 			      GFP_KERNEL);
1704 	if (!info->notes)
1705 		return 0;
1706 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1707 	if (!info->psinfo)
1708 		return 0;
1709 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1710 	if (!info->prstatus)
1711 		return 0;
1712 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1713 	if (!info->fpu)
1714 		return 0;
1715 #ifdef ELF_CORE_COPY_XFPREGS
1716 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1717 	if (!info->xfpu)
1718 		return 0;
1719 #endif
1720 
1721 	info->thread_status_size = 0;
1722 	if (signr) {
1723 		struct elf_thread_status *ets;
1724 		rcu_read_lock();
1725 		do_each_thread(g, p)
1726 			if (current->mm == p->mm && current != p) {
1727 				ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1728 				if (!ets) {
1729 					rcu_read_unlock();
1730 					return 0;
1731 				}
1732 				ets->thread = p;
1733 				list_add(&ets->list, &info->thread_list);
1734 			}
1735 		while_each_thread(g, p);
1736 		rcu_read_unlock();
1737 		list_for_each(t, &info->thread_list) {
1738 			int sz;
1739 
1740 			ets = list_entry(t, struct elf_thread_status, list);
1741 			sz = elf_dump_thread_status(signr, ets);
1742 			info->thread_status_size += sz;
1743 		}
1744 	}
1745 	/* now collect the dump for the current */
1746 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1747 	fill_prstatus(info->prstatus, current, signr);
1748 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1749 
1750 	/* Set up header */
1751 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1752 
1753 	/*
1754 	 * Set up the notes in similar form to SVR4 core dumps made
1755 	 * with info from their /proc.
1756 	 */
1757 
1758 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1759 		  sizeof(*info->prstatus), info->prstatus);
1760 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1761 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1762 		  sizeof(*info->psinfo), info->psinfo);
1763 
1764 	info->numnote = 2;
1765 
1766 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1767 
1768 	/* Try to dump the FPU. */
1769 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1770 							       info->fpu);
1771 	if (info->prstatus->pr_fpvalid)
1772 		fill_note(info->notes + info->numnote++,
1773 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1774 #ifdef ELF_CORE_COPY_XFPREGS
1775 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1776 		fill_note(info->notes + info->numnote++,
1777 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1778 			  sizeof(*info->xfpu), info->xfpu);
1779 #endif
1780 
1781 	return 1;
1782 
1783 #undef NUM_NOTES
1784 }
1785 
1786 static size_t get_note_info_size(struct elf_note_info *info)
1787 {
1788 	int sz = 0;
1789 	int i;
1790 
1791 	for (i = 0; i < info->numnote; i++)
1792 		sz += notesize(info->notes + i);
1793 
1794 	sz += info->thread_status_size;
1795 
1796 	return sz;
1797 }
1798 
1799 static int write_note_info(struct elf_note_info *info,
1800 			   struct file *file, loff_t *foffset)
1801 {
1802 	int i;
1803 	struct list_head *t;
1804 
1805 	for (i = 0; i < info->numnote; i++)
1806 		if (!writenote(info->notes + i, file, foffset))
1807 			return 0;
1808 
1809 	/* write out the thread status notes section */
1810 	list_for_each(t, &info->thread_list) {
1811 		struct elf_thread_status *tmp =
1812 				list_entry(t, struct elf_thread_status, list);
1813 
1814 		for (i = 0; i < tmp->num_notes; i++)
1815 			if (!writenote(&tmp->notes[i], file, foffset))
1816 				return 0;
1817 	}
1818 
1819 	return 1;
1820 }
1821 
1822 static void free_note_info(struct elf_note_info *info)
1823 {
1824 	while (!list_empty(&info->thread_list)) {
1825 		struct list_head *tmp = info->thread_list.next;
1826 		list_del(tmp);
1827 		kfree(list_entry(tmp, struct elf_thread_status, list));
1828 	}
1829 
1830 	kfree(info->prstatus);
1831 	kfree(info->psinfo);
1832 	kfree(info->notes);
1833 	kfree(info->fpu);
1834 #ifdef ELF_CORE_COPY_XFPREGS
1835 	kfree(info->xfpu);
1836 #endif
1837 }
1838 
1839 #endif
1840 
1841 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1842 					struct vm_area_struct *gate_vma)
1843 {
1844 	struct vm_area_struct *ret = tsk->mm->mmap;
1845 
1846 	if (ret)
1847 		return ret;
1848 	return gate_vma;
1849 }
1850 /*
1851  * Helper function for iterating across a vma list.  It ensures that the caller
1852  * will visit `gate_vma' prior to terminating the search.
1853  */
1854 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1855 					struct vm_area_struct *gate_vma)
1856 {
1857 	struct vm_area_struct *ret;
1858 
1859 	ret = this_vma->vm_next;
1860 	if (ret)
1861 		return ret;
1862 	if (this_vma == gate_vma)
1863 		return NULL;
1864 	return gate_vma;
1865 }
1866 
1867 /*
1868  * Actual dumper
1869  *
1870  * This is a two-pass process; first we find the offsets of the bits,
1871  * and then they are actually written out.  If we run out of core limit
1872  * we just truncate.
1873  */
1874 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1875 {
1876 	int has_dumped = 0;
1877 	mm_segment_t fs;
1878 	int segs;
1879 	size_t size = 0;
1880 	struct vm_area_struct *vma, *gate_vma;
1881 	struct elfhdr *elf = NULL;
1882 	loff_t offset = 0, dataoff, foffset;
1883 	unsigned long mm_flags;
1884 	struct elf_note_info info;
1885 
1886 	/*
1887 	 * We no longer stop all VM operations.
1888 	 *
1889 	 * This is because those proceses that could possibly change map_count
1890 	 * or the mmap / vma pages are now blocked in do_exit on current
1891 	 * finishing this core dump.
1892 	 *
1893 	 * Only ptrace can touch these memory addresses, but it doesn't change
1894 	 * the map_count or the pages allocated. So no possibility of crashing
1895 	 * exists while dumping the mm->vm_next areas to the core file.
1896 	 */
1897 
1898 	/* alloc memory for large data structures: too large to be on stack */
1899 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1900 	if (!elf)
1901 		goto out;
1902 
1903 	segs = current->mm->map_count;
1904 #ifdef ELF_CORE_EXTRA_PHDRS
1905 	segs += ELF_CORE_EXTRA_PHDRS;
1906 #endif
1907 
1908 	gate_vma = get_gate_vma(current);
1909 	if (gate_vma != NULL)
1910 		segs++;
1911 
1912 	/*
1913 	 * Collect all the non-memory information about the process for the
1914 	 * notes.  This also sets up the file header.
1915 	 */
1916 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1917 			    &info, signr, regs))
1918 		goto cleanup;
1919 
1920 	has_dumped = 1;
1921 	current->flags |= PF_DUMPCORE;
1922 
1923 	fs = get_fs();
1924 	set_fs(KERNEL_DS);
1925 
1926 	DUMP_WRITE(elf, sizeof(*elf));
1927 	offset += sizeof(*elf);				/* Elf header */
1928 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1929 	foffset = offset;
1930 
1931 	/* Write notes phdr entry */
1932 	{
1933 		struct elf_phdr phdr;
1934 		size_t sz = get_note_info_size(&info);
1935 
1936 		sz += elf_coredump_extra_notes_size();
1937 
1938 		fill_elf_note_phdr(&phdr, sz, offset);
1939 		offset += sz;
1940 		DUMP_WRITE(&phdr, sizeof(phdr));
1941 	}
1942 
1943 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1944 
1945 	/*
1946 	 * We must use the same mm->flags while dumping core to avoid
1947 	 * inconsistency between the program headers and bodies, otherwise an
1948 	 * unusable core file can be generated.
1949 	 */
1950 	mm_flags = current->mm->flags;
1951 
1952 	/* Write program headers for segments dump */
1953 	for (vma = first_vma(current, gate_vma); vma != NULL;
1954 			vma = next_vma(vma, gate_vma)) {
1955 		struct elf_phdr phdr;
1956 
1957 		phdr.p_type = PT_LOAD;
1958 		phdr.p_offset = offset;
1959 		phdr.p_vaddr = vma->vm_start;
1960 		phdr.p_paddr = 0;
1961 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1962 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1963 		offset += phdr.p_filesz;
1964 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1965 		if (vma->vm_flags & VM_WRITE)
1966 			phdr.p_flags |= PF_W;
1967 		if (vma->vm_flags & VM_EXEC)
1968 			phdr.p_flags |= PF_X;
1969 		phdr.p_align = ELF_EXEC_PAGESIZE;
1970 
1971 		DUMP_WRITE(&phdr, sizeof(phdr));
1972 	}
1973 
1974 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1975 	ELF_CORE_WRITE_EXTRA_PHDRS;
1976 #endif
1977 
1978  	/* write out the notes section */
1979 	if (!write_note_info(&info, file, &foffset))
1980 		goto end_coredump;
1981 
1982 	if (elf_coredump_extra_notes_write(file, &foffset))
1983 		goto end_coredump;
1984 
1985 	/* Align to page */
1986 	DUMP_SEEK(dataoff - foffset);
1987 
1988 	for (vma = first_vma(current, gate_vma); vma != NULL;
1989 			vma = next_vma(vma, gate_vma)) {
1990 		unsigned long addr;
1991 		unsigned long end;
1992 
1993 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
1994 
1995 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1996 			struct page *page;
1997 			struct vm_area_struct *tmp_vma;
1998 
1999 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2000 						&page, &tmp_vma) <= 0) {
2001 				DUMP_SEEK(PAGE_SIZE);
2002 			} else {
2003 				if (page == ZERO_PAGE(0)) {
2004 					if (!dump_seek(file, PAGE_SIZE)) {
2005 						page_cache_release(page);
2006 						goto end_coredump;
2007 					}
2008 				} else {
2009 					void *kaddr;
2010 					flush_cache_page(tmp_vma, addr,
2011 							 page_to_pfn(page));
2012 					kaddr = kmap(page);
2013 					if ((size += PAGE_SIZE) > limit ||
2014 					    !dump_write(file, kaddr,
2015 					    PAGE_SIZE)) {
2016 						kunmap(page);
2017 						page_cache_release(page);
2018 						goto end_coredump;
2019 					}
2020 					kunmap(page);
2021 				}
2022 				page_cache_release(page);
2023 			}
2024 		}
2025 	}
2026 
2027 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2028 	ELF_CORE_WRITE_EXTRA_DATA;
2029 #endif
2030 
2031 end_coredump:
2032 	set_fs(fs);
2033 
2034 cleanup:
2035 	free_note_info(&info);
2036 	kfree(elf);
2037 out:
2038 	return has_dumped;
2039 }
2040 
2041 #endif		/* USE_ELF_CORE_DUMP */
2042 
2043 static int __init init_elf_binfmt(void)
2044 {
2045 	return register_binfmt(&elf_format);
2046 }
2047 
2048 static void __exit exit_elf_binfmt(void)
2049 {
2050 	/* Remove the COFF and ELF loaders. */
2051 	unregister_binfmt(&elf_format);
2052 }
2053 
2054 core_initcall(init_elf_binfmt);
2055 module_exit(exit_elf_binfmt);
2056 MODULE_LICENSE("GPL");
2057