xref: /linux/fs/binfmt_elf.c (revision ed3174d93c342b8b2eeba6bbd124707d55304a7b)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
49 				int, int, unsigned long);
50 
51 /*
52  * If we don't support core dumping, then supply a NULL so we
53  * don't even try.
54  */
55 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
56 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
57 #else
58 #define elf_core_dump	NULL
59 #endif
60 
61 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
62 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
63 #else
64 #define ELF_MIN_ALIGN	PAGE_SIZE
65 #endif
66 
67 #ifndef ELF_CORE_EFLAGS
68 #define ELF_CORE_EFLAGS	0
69 #endif
70 
71 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
72 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
73 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
74 
75 static struct linux_binfmt elf_format = {
76 		.module		= THIS_MODULE,
77 		.load_binary	= load_elf_binary,
78 		.load_shlib	= load_elf_library,
79 		.core_dump	= elf_core_dump,
80 		.min_coredump	= ELF_EXEC_PAGESIZE,
81 		.hasvdso	= 1
82 };
83 
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85 
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88 	start = ELF_PAGEALIGN(start);
89 	end = ELF_PAGEALIGN(end);
90 	if (end > start) {
91 		unsigned long addr;
92 		down_write(&current->mm->mmap_sem);
93 		addr = do_brk(start, end - start);
94 		up_write(&current->mm->mmap_sem);
95 		if (BAD_ADDR(addr))
96 			return addr;
97 	}
98 	current->mm->start_brk = current->mm->brk = end;
99 	return 0;
100 }
101 
102 /* We need to explicitly zero any fractional pages
103    after the data section (i.e. bss).  This would
104    contain the junk from the file that should not
105    be in memory
106  */
107 static int padzero(unsigned long elf_bss)
108 {
109 	unsigned long nbyte;
110 
111 	nbyte = ELF_PAGEOFFSET(elf_bss);
112 	if (nbyte) {
113 		nbyte = ELF_MIN_ALIGN - nbyte;
114 		if (clear_user((void __user *) elf_bss, nbyte))
115 			return -EFAULT;
116 	}
117 	return 0;
118 }
119 
120 /* Let's use some macros to make this stack manipulation a little clearer */
121 #ifdef CONFIG_STACK_GROWSUP
122 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
123 #define STACK_ROUND(sp, items) \
124 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
125 #define STACK_ALLOC(sp, len) ({ \
126 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
127 	old_sp; })
128 #else
129 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
130 #define STACK_ROUND(sp, items) \
131 	(((unsigned long) (sp - items)) &~ 15UL)
132 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
133 #endif
134 
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137 		unsigned long load_addr, unsigned long interp_load_addr)
138 {
139 	unsigned long p = bprm->p;
140 	int argc = bprm->argc;
141 	int envc = bprm->envc;
142 	elf_addr_t __user *argv;
143 	elf_addr_t __user *envp;
144 	elf_addr_t __user *sp;
145 	elf_addr_t __user *u_platform;
146 	const char *k_platform = ELF_PLATFORM;
147 	int items;
148 	elf_addr_t *elf_info;
149 	int ei_index = 0;
150 	struct task_struct *tsk = current;
151 	struct vm_area_struct *vma;
152 
153 	/*
154 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
155 	 * evictions by the processes running on the same package. One
156 	 * thing we can do is to shuffle the initial stack for them.
157 	 */
158 
159 	p = arch_align_stack(p);
160 
161 	/*
162 	 * If this architecture has a platform capability string, copy it
163 	 * to userspace.  In some cases (Sparc), this info is impossible
164 	 * for userspace to get any other way, in others (i386) it is
165 	 * merely difficult.
166 	 */
167 	u_platform = NULL;
168 	if (k_platform) {
169 		size_t len = strlen(k_platform) + 1;
170 
171 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 		if (__copy_to_user(u_platform, k_platform, len))
173 			return -EFAULT;
174 	}
175 
176 	/* Create the ELF interpreter info */
177 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
178 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
179 #define NEW_AUX_ENT(id, val) \
180 	do { \
181 		elf_info[ei_index++] = id; \
182 		elf_info[ei_index++] = val; \
183 	} while (0)
184 
185 #ifdef ARCH_DLINFO
186 	/*
187 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
188 	 * AUXV.
189 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
190 	 * ARCH_DLINFO changes
191 	 */
192 	ARCH_DLINFO;
193 #endif
194 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 	NEW_AUX_ENT(AT_FLAGS, 0);
202 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 	NEW_AUX_ENT(AT_UID, tsk->uid);
204 	NEW_AUX_ENT(AT_EUID, tsk->euid);
205 	NEW_AUX_ENT(AT_GID, tsk->gid);
206 	NEW_AUX_ENT(AT_EGID, tsk->egid);
207  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1) + 1;
226 	bprm->p = STACK_ROUND(sp, items);
227 
228 	/* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233 	sp = (elf_addr_t __user *)bprm->p;
234 #endif
235 
236 
237 	/*
238 	 * Grow the stack manually; some architectures have a limit on how
239 	 * far ahead a user-space access may be in order to grow the stack.
240 	 */
241 	vma = find_extend_vma(current->mm, bprm->p);
242 	if (!vma)
243 		return -EFAULT;
244 
245 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
246 	if (__put_user(argc, sp++))
247 		return -EFAULT;
248 	argv = sp;
249 	envp = argv + argc + 1;
250 
251 	/* Populate argv and envp */
252 	p = current->mm->arg_end = current->mm->arg_start;
253 	while (argc-- > 0) {
254 		size_t len;
255 		if (__put_user((elf_addr_t)p, argv++))
256 			return -EFAULT;
257 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258 		if (!len || len > MAX_ARG_STRLEN)
259 			return 0;
260 		p += len;
261 	}
262 	if (__put_user(0, argv))
263 		return -EFAULT;
264 	current->mm->arg_end = current->mm->env_start = p;
265 	while (envc-- > 0) {
266 		size_t len;
267 		if (__put_user((elf_addr_t)p, envp++))
268 			return -EFAULT;
269 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270 		if (!len || len > MAX_ARG_STRLEN)
271 			return 0;
272 		p += len;
273 	}
274 	if (__put_user(0, envp))
275 		return -EFAULT;
276 	current->mm->env_end = p;
277 
278 	/* Put the elf_info on the stack in the right place.  */
279 	sp = (elf_addr_t __user *)envp + 1;
280 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281 		return -EFAULT;
282 	return 0;
283 }
284 
285 #ifndef elf_map
286 
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288 		struct elf_phdr *eppnt, int prot, int type,
289 		unsigned long total_size)
290 {
291 	unsigned long map_addr;
292 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294 	addr = ELF_PAGESTART(addr);
295 	size = ELF_PAGEALIGN(size);
296 
297 	/* mmap() will return -EINVAL if given a zero size, but a
298 	 * segment with zero filesize is perfectly valid */
299 	if (!size)
300 		return addr;
301 
302 	down_write(&current->mm->mmap_sem);
303 	/*
304 	* total_size is the size of the ELF (interpreter) image.
305 	* The _first_ mmap needs to know the full size, otherwise
306 	* randomization might put this image into an overlapping
307 	* position with the ELF binary image. (since size < total_size)
308 	* So we first map the 'big' image - and unmap the remainder at
309 	* the end. (which unmap is needed for ELF images with holes.)
310 	*/
311 	if (total_size) {
312 		total_size = ELF_PAGEALIGN(total_size);
313 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314 		if (!BAD_ADDR(map_addr))
315 			do_munmap(current->mm, map_addr+size, total_size-size);
316 	} else
317 		map_addr = do_mmap(filep, addr, size, prot, type, off);
318 
319 	up_write(&current->mm->mmap_sem);
320 	return(map_addr);
321 }
322 
323 #endif /* !elf_map */
324 
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327 	int i, first_idx = -1, last_idx = -1;
328 
329 	for (i = 0; i < nr; i++) {
330 		if (cmds[i].p_type == PT_LOAD) {
331 			last_idx = i;
332 			if (first_idx == -1)
333 				first_idx = i;
334 		}
335 	}
336 	if (first_idx == -1)
337 		return 0;
338 
339 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342 
343 
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348 
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350 		struct file *interpreter, unsigned long *interp_map_addr,
351 		unsigned long no_base)
352 {
353 	struct elf_phdr *elf_phdata;
354 	struct elf_phdr *eppnt;
355 	unsigned long load_addr = 0;
356 	int load_addr_set = 0;
357 	unsigned long last_bss = 0, elf_bss = 0;
358 	unsigned long error = ~0UL;
359 	unsigned long total_size;
360 	int retval, i, size;
361 
362 	/* First of all, some simple consistency checks */
363 	if (interp_elf_ex->e_type != ET_EXEC &&
364 	    interp_elf_ex->e_type != ET_DYN)
365 		goto out;
366 	if (!elf_check_arch(interp_elf_ex))
367 		goto out;
368 	if (!interpreter->f_op || !interpreter->f_op->mmap)
369 		goto out;
370 
371 	/*
372 	 * If the size of this structure has changed, then punt, since
373 	 * we will be doing the wrong thing.
374 	 */
375 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376 		goto out;
377 	if (interp_elf_ex->e_phnum < 1 ||
378 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379 		goto out;
380 
381 	/* Now read in all of the header information */
382 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383 	if (size > ELF_MIN_ALIGN)
384 		goto out;
385 	elf_phdata = kmalloc(size, GFP_KERNEL);
386 	if (!elf_phdata)
387 		goto out;
388 
389 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390 			     (char *)elf_phdata,size);
391 	error = -EIO;
392 	if (retval != size) {
393 		if (retval < 0)
394 			error = retval;
395 		goto out_close;
396 	}
397 
398 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399 	if (!total_size) {
400 		error = -EINVAL;
401 		goto out_close;
402 	}
403 
404 	eppnt = elf_phdata;
405 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406 		if (eppnt->p_type == PT_LOAD) {
407 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408 			int elf_prot = 0;
409 			unsigned long vaddr = 0;
410 			unsigned long k, map_addr;
411 
412 			if (eppnt->p_flags & PF_R)
413 		    		elf_prot = PROT_READ;
414 			if (eppnt->p_flags & PF_W)
415 				elf_prot |= PROT_WRITE;
416 			if (eppnt->p_flags & PF_X)
417 				elf_prot |= PROT_EXEC;
418 			vaddr = eppnt->p_vaddr;
419 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420 				elf_type |= MAP_FIXED;
421 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
422 				load_addr = -vaddr;
423 
424 			map_addr = elf_map(interpreter, load_addr + vaddr,
425 					eppnt, elf_prot, elf_type, total_size);
426 			total_size = 0;
427 			if (!*interp_map_addr)
428 				*interp_map_addr = map_addr;
429 			error = map_addr;
430 			if (BAD_ADDR(map_addr))
431 				goto out_close;
432 
433 			if (!load_addr_set &&
434 			    interp_elf_ex->e_type == ET_DYN) {
435 				load_addr = map_addr - ELF_PAGESTART(vaddr);
436 				load_addr_set = 1;
437 			}
438 
439 			/*
440 			 * Check to see if the section's size will overflow the
441 			 * allowed task size. Note that p_filesz must always be
442 			 * <= p_memsize so it's only necessary to check p_memsz.
443 			 */
444 			k = load_addr + eppnt->p_vaddr;
445 			if (BAD_ADDR(k) ||
446 			    eppnt->p_filesz > eppnt->p_memsz ||
447 			    eppnt->p_memsz > TASK_SIZE ||
448 			    TASK_SIZE - eppnt->p_memsz < k) {
449 				error = -ENOMEM;
450 				goto out_close;
451 			}
452 
453 			/*
454 			 * Find the end of the file mapping for this phdr, and
455 			 * keep track of the largest address we see for this.
456 			 */
457 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458 			if (k > elf_bss)
459 				elf_bss = k;
460 
461 			/*
462 			 * Do the same thing for the memory mapping - between
463 			 * elf_bss and last_bss is the bss section.
464 			 */
465 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466 			if (k > last_bss)
467 				last_bss = k;
468 		}
469 	}
470 
471 	/*
472 	 * Now fill out the bss section.  First pad the last page up
473 	 * to the page boundary, and then perform a mmap to make sure
474 	 * that there are zero-mapped pages up to and including the
475 	 * last bss page.
476 	 */
477 	if (padzero(elf_bss)) {
478 		error = -EFAULT;
479 		goto out_close;
480 	}
481 
482 	/* What we have mapped so far */
483 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484 
485 	/* Map the last of the bss segment */
486 	if (last_bss > elf_bss) {
487 		down_write(&current->mm->mmap_sem);
488 		error = do_brk(elf_bss, last_bss - elf_bss);
489 		up_write(&current->mm->mmap_sem);
490 		if (BAD_ADDR(error))
491 			goto out_close;
492 	}
493 
494 	error = load_addr;
495 
496 out_close:
497 	kfree(elf_phdata);
498 out:
499 	return error;
500 }
501 
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506 
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if ((current->flags & PF_RANDOMIZE) &&
519 		!(current->personality & ADDR_NO_RANDOMIZE)) {
520 		random_variable = get_random_int() & STACK_RND_MASK;
521 		random_variable <<= PAGE_SHIFT;
522 	}
523 #ifdef CONFIG_STACK_GROWSUP
524 	return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526 	return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529 
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532 	struct file *interpreter = NULL; /* to shut gcc up */
533  	unsigned long load_addr = 0, load_bias = 0;
534 	int load_addr_set = 0;
535 	char * elf_interpreter = NULL;
536 	unsigned long error;
537 	struct elf_phdr *elf_ppnt, *elf_phdata;
538 	unsigned long elf_bss, elf_brk;
539 	int elf_exec_fileno;
540 	int retval, i;
541 	unsigned int size;
542 	unsigned long elf_entry;
543 	unsigned long interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	struct files_struct *files;
547 	int executable_stack = EXSTACK_DEFAULT;
548 	unsigned long def_flags = 0;
549 	struct {
550 		struct elfhdr elf_ex;
551 		struct elfhdr interp_elf_ex;
552   		struct exec interp_ex;
553 	} *loc;
554 
555 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
556 	if (!loc) {
557 		retval = -ENOMEM;
558 		goto out_ret;
559 	}
560 
561 	/* Get the exec-header */
562 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
563 
564 	retval = -ENOEXEC;
565 	/* First of all, some simple consistency checks */
566 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
567 		goto out;
568 
569 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
570 		goto out;
571 	if (!elf_check_arch(&loc->elf_ex))
572 		goto out;
573 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
574 		goto out;
575 
576 	/* Now read in all of the header information */
577 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
578 		goto out;
579 	if (loc->elf_ex.e_phnum < 1 ||
580 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
581 		goto out;
582 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
583 	retval = -ENOMEM;
584 	elf_phdata = kmalloc(size, GFP_KERNEL);
585 	if (!elf_phdata)
586 		goto out;
587 
588 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
589 			     (char *)elf_phdata, size);
590 	if (retval != size) {
591 		if (retval >= 0)
592 			retval = -EIO;
593 		goto out_free_ph;
594 	}
595 
596 	files = current->files;	/* Refcounted so ok */
597 	retval = unshare_files();
598 	if (retval < 0)
599 		goto out_free_ph;
600 	if (files == current->files) {
601 		put_files_struct(files);
602 		files = NULL;
603 	}
604 
605 	/* exec will make our files private anyway, but for the a.out
606 	   loader stuff we need to do it earlier */
607 	retval = get_unused_fd();
608 	if (retval < 0)
609 		goto out_free_fh;
610 	get_file(bprm->file);
611 	fd_install(elf_exec_fileno = retval, bprm->file);
612 
613 	elf_ppnt = elf_phdata;
614 	elf_bss = 0;
615 	elf_brk = 0;
616 
617 	start_code = ~0UL;
618 	end_code = 0;
619 	start_data = 0;
620 	end_data = 0;
621 
622 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
623 		if (elf_ppnt->p_type == PT_INTERP) {
624 			/* This is the program interpreter used for
625 			 * shared libraries - for now assume that this
626 			 * is an a.out format binary
627 			 */
628 			retval = -ENOEXEC;
629 			if (elf_ppnt->p_filesz > PATH_MAX ||
630 			    elf_ppnt->p_filesz < 2)
631 				goto out_free_file;
632 
633 			retval = -ENOMEM;
634 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
635 						  GFP_KERNEL);
636 			if (!elf_interpreter)
637 				goto out_free_file;
638 
639 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
640 					     elf_interpreter,
641 					     elf_ppnt->p_filesz);
642 			if (retval != elf_ppnt->p_filesz) {
643 				if (retval >= 0)
644 					retval = -EIO;
645 				goto out_free_interp;
646 			}
647 			/* make sure path is NULL terminated */
648 			retval = -ENOEXEC;
649 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
650 				goto out_free_interp;
651 
652 			/*
653 			 * The early SET_PERSONALITY here is so that the lookup
654 			 * for the interpreter happens in the namespace of the
655 			 * to-be-execed image.  SET_PERSONALITY can select an
656 			 * alternate root.
657 			 *
658 			 * However, SET_PERSONALITY is NOT allowed to switch
659 			 * this task into the new images's memory mapping
660 			 * policy - that is, TASK_SIZE must still evaluate to
661 			 * that which is appropriate to the execing application.
662 			 * This is because exit_mmap() needs to have TASK_SIZE
663 			 * evaluate to the size of the old image.
664 			 *
665 			 * So if (say) a 64-bit application is execing a 32-bit
666 			 * application it is the architecture's responsibility
667 			 * to defer changing the value of TASK_SIZE until the
668 			 * switch really is going to happen - do this in
669 			 * flush_thread().	- akpm
670 			 */
671 			SET_PERSONALITY(loc->elf_ex, 0);
672 
673 			interpreter = open_exec(elf_interpreter);
674 			retval = PTR_ERR(interpreter);
675 			if (IS_ERR(interpreter))
676 				goto out_free_interp;
677 
678 			/*
679 			 * If the binary is not readable then enforce
680 			 * mm->dumpable = 0 regardless of the interpreter's
681 			 * permissions.
682 			 */
683 			if (file_permission(interpreter, MAY_READ) < 0)
684 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
685 
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 
712 	/* Some simple consistency checks for the interpreter */
713 	if (elf_interpreter) {
714 		retval = -ELIBBAD;
715 		/* Not an ELF interpreter */
716 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
717 			goto out_free_dentry;
718 		/* Verify the interpreter has a valid arch */
719 		if (!elf_check_arch(&loc->interp_elf_ex))
720 			goto out_free_dentry;
721 	} else {
722 		/* Executables without an interpreter also need a personality  */
723 		SET_PERSONALITY(loc->elf_ex, 0);
724 	}
725 
726 	/* Flush all traces of the currently running executable */
727 	retval = flush_old_exec(bprm);
728 	if (retval)
729 		goto out_free_dentry;
730 
731 	/* Discard our unneeded old files struct */
732 	if (files) {
733 		put_files_struct(files);
734 		files = NULL;
735 	}
736 
737 	/* OK, This is the point of no return */
738 	current->flags &= ~PF_FORKNOEXEC;
739 	current->mm->def_flags = def_flags;
740 
741 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
742 	   may depend on the personality.  */
743 	SET_PERSONALITY(loc->elf_ex, 0);
744 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
745 		current->personality |= READ_IMPLIES_EXEC;
746 
747 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
748 		current->flags |= PF_RANDOMIZE;
749 	arch_pick_mmap_layout(current->mm);
750 
751 	/* Do this so that we can load the interpreter, if need be.  We will
752 	   change some of these later */
753 	current->mm->free_area_cache = current->mm->mmap_base;
754 	current->mm->cached_hole_size = 0;
755 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
756 				 executable_stack);
757 	if (retval < 0) {
758 		send_sig(SIGKILL, current, 0);
759 		goto out_free_dentry;
760 	}
761 
762 	current->mm->start_stack = bprm->p;
763 
764 	/* Now we do a little grungy work by mmaping the ELF image into
765 	   the correct location in memory. */
766 	for(i = 0, elf_ppnt = elf_phdata;
767 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
768 		int elf_prot = 0, elf_flags;
769 		unsigned long k, vaddr;
770 
771 		if (elf_ppnt->p_type != PT_LOAD)
772 			continue;
773 
774 		if (unlikely (elf_brk > elf_bss)) {
775 			unsigned long nbyte;
776 
777 			/* There was a PT_LOAD segment with p_memsz > p_filesz
778 			   before this one. Map anonymous pages, if needed,
779 			   and clear the area.  */
780 			retval = set_brk (elf_bss + load_bias,
781 					  elf_brk + load_bias);
782 			if (retval) {
783 				send_sig(SIGKILL, current, 0);
784 				goto out_free_dentry;
785 			}
786 			nbyte = ELF_PAGEOFFSET(elf_bss);
787 			if (nbyte) {
788 				nbyte = ELF_MIN_ALIGN - nbyte;
789 				if (nbyte > elf_brk - elf_bss)
790 					nbyte = elf_brk - elf_bss;
791 				if (clear_user((void __user *)elf_bss +
792 							load_bias, nbyte)) {
793 					/*
794 					 * This bss-zeroing can fail if the ELF
795 					 * file specifies odd protections. So
796 					 * we don't check the return value
797 					 */
798 				}
799 			}
800 		}
801 
802 		if (elf_ppnt->p_flags & PF_R)
803 			elf_prot |= PROT_READ;
804 		if (elf_ppnt->p_flags & PF_W)
805 			elf_prot |= PROT_WRITE;
806 		if (elf_ppnt->p_flags & PF_X)
807 			elf_prot |= PROT_EXEC;
808 
809 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
810 
811 		vaddr = elf_ppnt->p_vaddr;
812 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
813 			elf_flags |= MAP_FIXED;
814 		} else if (loc->elf_ex.e_type == ET_DYN) {
815 			/* Try and get dynamic programs out of the way of the
816 			 * default mmap base, as well as whatever program they
817 			 * might try to exec.  This is because the brk will
818 			 * follow the loader, and is not movable.  */
819 #ifdef CONFIG_X86
820 			load_bias = 0;
821 #else
822 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
823 #endif
824 		}
825 
826 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
827 				elf_prot, elf_flags, 0);
828 		if (BAD_ADDR(error)) {
829 			send_sig(SIGKILL, current, 0);
830 			retval = IS_ERR((void *)error) ?
831 				PTR_ERR((void*)error) : -EINVAL;
832 			goto out_free_dentry;
833 		}
834 
835 		if (!load_addr_set) {
836 			load_addr_set = 1;
837 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
838 			if (loc->elf_ex.e_type == ET_DYN) {
839 				load_bias += error -
840 				             ELF_PAGESTART(load_bias + vaddr);
841 				load_addr += load_bias;
842 				reloc_func_desc = load_bias;
843 			}
844 		}
845 		k = elf_ppnt->p_vaddr;
846 		if (k < start_code)
847 			start_code = k;
848 		if (start_data < k)
849 			start_data = k;
850 
851 		/*
852 		 * Check to see if the section's size will overflow the
853 		 * allowed task size. Note that p_filesz must always be
854 		 * <= p_memsz so it is only necessary to check p_memsz.
855 		 */
856 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
857 		    elf_ppnt->p_memsz > TASK_SIZE ||
858 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
859 			/* set_brk can never work. Avoid overflows. */
860 			send_sig(SIGKILL, current, 0);
861 			retval = -EINVAL;
862 			goto out_free_dentry;
863 		}
864 
865 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
866 
867 		if (k > elf_bss)
868 			elf_bss = k;
869 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
870 			end_code = k;
871 		if (end_data < k)
872 			end_data = k;
873 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
874 		if (k > elf_brk)
875 			elf_brk = k;
876 	}
877 
878 	loc->elf_ex.e_entry += load_bias;
879 	elf_bss += load_bias;
880 	elf_brk += load_bias;
881 	start_code += load_bias;
882 	end_code += load_bias;
883 	start_data += load_bias;
884 	end_data += load_bias;
885 
886 	/* Calling set_brk effectively mmaps the pages that we need
887 	 * for the bss and break sections.  We must do this before
888 	 * mapping in the interpreter, to make sure it doesn't wind
889 	 * up getting placed where the bss needs to go.
890 	 */
891 	retval = set_brk(elf_bss, elf_brk);
892 	if (retval) {
893 		send_sig(SIGKILL, current, 0);
894 		goto out_free_dentry;
895 	}
896 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
897 		send_sig(SIGSEGV, current, 0);
898 		retval = -EFAULT; /* Nobody gets to see this, but.. */
899 		goto out_free_dentry;
900 	}
901 
902 	if (elf_interpreter) {
903 		unsigned long uninitialized_var(interp_map_addr);
904 
905 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
906 					    interpreter,
907 					    &interp_map_addr,
908 					    load_bias);
909 		if (!IS_ERR((void *)elf_entry)) {
910 			/*
911 			 * load_elf_interp() returns relocation
912 			 * adjustment
913 			 */
914 			interp_load_addr = elf_entry;
915 			elf_entry += loc->interp_elf_ex.e_entry;
916 		}
917 		if (BAD_ADDR(elf_entry)) {
918 			force_sig(SIGSEGV, current);
919 			retval = IS_ERR((void *)elf_entry) ?
920 					(int)elf_entry : -EINVAL;
921 			goto out_free_dentry;
922 		}
923 		reloc_func_desc = interp_load_addr;
924 
925 		allow_write_access(interpreter);
926 		fput(interpreter);
927 		kfree(elf_interpreter);
928 	} else {
929 		elf_entry = loc->elf_ex.e_entry;
930 		if (BAD_ADDR(elf_entry)) {
931 			force_sig(SIGSEGV, current);
932 			retval = -EINVAL;
933 			goto out_free_dentry;
934 		}
935 	}
936 
937 	kfree(elf_phdata);
938 
939 	sys_close(elf_exec_fileno);
940 
941 	set_binfmt(&elf_format);
942 
943 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
944 	retval = arch_setup_additional_pages(bprm, executable_stack);
945 	if (retval < 0) {
946 		send_sig(SIGKILL, current, 0);
947 		goto out;
948 	}
949 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
950 
951 	compute_creds(bprm);
952 	current->flags &= ~PF_FORKNOEXEC;
953 	retval = create_elf_tables(bprm, &loc->elf_ex,
954 			  load_addr, interp_load_addr);
955 	if (retval < 0) {
956 		send_sig(SIGKILL, current, 0);
957 		goto out;
958 	}
959 	/* N.B. passed_fileno might not be initialized? */
960 	current->mm->end_code = end_code;
961 	current->mm->start_code = start_code;
962 	current->mm->start_data = start_data;
963 	current->mm->end_data = end_data;
964 	current->mm->start_stack = bprm->p;
965 
966 #ifdef arch_randomize_brk
967 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
968 		current->mm->brk = current->mm->start_brk =
969 			arch_randomize_brk(current->mm);
970 #endif
971 
972 	if (current->personality & MMAP_PAGE_ZERO) {
973 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
974 		   and some applications "depend" upon this behavior.
975 		   Since we do not have the power to recompile these, we
976 		   emulate the SVr4 behavior. Sigh. */
977 		down_write(&current->mm->mmap_sem);
978 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
979 				MAP_FIXED | MAP_PRIVATE, 0);
980 		up_write(&current->mm->mmap_sem);
981 	}
982 
983 #ifdef ELF_PLAT_INIT
984 	/*
985 	 * The ABI may specify that certain registers be set up in special
986 	 * ways (on i386 %edx is the address of a DT_FINI function, for
987 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
988 	 * that the e_entry field is the address of the function descriptor
989 	 * for the startup routine, rather than the address of the startup
990 	 * routine itself.  This macro performs whatever initialization to
991 	 * the regs structure is required as well as any relocations to the
992 	 * function descriptor entries when executing dynamically links apps.
993 	 */
994 	ELF_PLAT_INIT(regs, reloc_func_desc);
995 #endif
996 
997 	start_thread(regs, elf_entry, bprm->p);
998 	if (unlikely(current->ptrace & PT_PTRACED)) {
999 		if (current->ptrace & PT_TRACE_EXEC)
1000 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1001 		else
1002 			send_sig(SIGTRAP, current, 0);
1003 	}
1004 	retval = 0;
1005 out:
1006 	kfree(loc);
1007 out_ret:
1008 	return retval;
1009 
1010 	/* error cleanup */
1011 out_free_dentry:
1012 	allow_write_access(interpreter);
1013 	if (interpreter)
1014 		fput(interpreter);
1015 out_free_interp:
1016 	kfree(elf_interpreter);
1017 out_free_file:
1018 	sys_close(elf_exec_fileno);
1019 out_free_fh:
1020 	if (files)
1021 		reset_files_struct(current, files);
1022 out_free_ph:
1023 	kfree(elf_phdata);
1024 	goto out;
1025 }
1026 
1027 /* This is really simpleminded and specialized - we are loading an
1028    a.out library that is given an ELF header. */
1029 static int load_elf_library(struct file *file)
1030 {
1031 	struct elf_phdr *elf_phdata;
1032 	struct elf_phdr *eppnt;
1033 	unsigned long elf_bss, bss, len;
1034 	int retval, error, i, j;
1035 	struct elfhdr elf_ex;
1036 
1037 	error = -ENOEXEC;
1038 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1039 	if (retval != sizeof(elf_ex))
1040 		goto out;
1041 
1042 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1043 		goto out;
1044 
1045 	/* First of all, some simple consistency checks */
1046 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1047 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1048 		goto out;
1049 
1050 	/* Now read in all of the header information */
1051 
1052 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1053 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1054 
1055 	error = -ENOMEM;
1056 	elf_phdata = kmalloc(j, GFP_KERNEL);
1057 	if (!elf_phdata)
1058 		goto out;
1059 
1060 	eppnt = elf_phdata;
1061 	error = -ENOEXEC;
1062 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1063 	if (retval != j)
1064 		goto out_free_ph;
1065 
1066 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1067 		if ((eppnt + i)->p_type == PT_LOAD)
1068 			j++;
1069 	if (j != 1)
1070 		goto out_free_ph;
1071 
1072 	while (eppnt->p_type != PT_LOAD)
1073 		eppnt++;
1074 
1075 	/* Now use mmap to map the library into memory. */
1076 	down_write(&current->mm->mmap_sem);
1077 	error = do_mmap(file,
1078 			ELF_PAGESTART(eppnt->p_vaddr),
1079 			(eppnt->p_filesz +
1080 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1081 			PROT_READ | PROT_WRITE | PROT_EXEC,
1082 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1083 			(eppnt->p_offset -
1084 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1085 	up_write(&current->mm->mmap_sem);
1086 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1087 		goto out_free_ph;
1088 
1089 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1090 	if (padzero(elf_bss)) {
1091 		error = -EFAULT;
1092 		goto out_free_ph;
1093 	}
1094 
1095 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1096 			    ELF_MIN_ALIGN - 1);
1097 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1098 	if (bss > len) {
1099 		down_write(&current->mm->mmap_sem);
1100 		do_brk(len, bss - len);
1101 		up_write(&current->mm->mmap_sem);
1102 	}
1103 	error = 0;
1104 
1105 out_free_ph:
1106 	kfree(elf_phdata);
1107 out:
1108 	return error;
1109 }
1110 
1111 /*
1112  * Note that some platforms still use traditional core dumps and not
1113  * the ELF core dump.  Each platform can select it as appropriate.
1114  */
1115 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1116 
1117 /*
1118  * ELF core dumper
1119  *
1120  * Modelled on fs/exec.c:aout_core_dump()
1121  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1122  */
1123 /*
1124  * These are the only things you should do on a core-file: use only these
1125  * functions to write out all the necessary info.
1126  */
1127 static int dump_write(struct file *file, const void *addr, int nr)
1128 {
1129 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1130 }
1131 
1132 static int dump_seek(struct file *file, loff_t off)
1133 {
1134 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1135 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1136 			return 0;
1137 	} else {
1138 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1139 		if (!buf)
1140 			return 0;
1141 		while (off > 0) {
1142 			unsigned long n = off;
1143 			if (n > PAGE_SIZE)
1144 				n = PAGE_SIZE;
1145 			if (!dump_write(file, buf, n))
1146 				return 0;
1147 			off -= n;
1148 		}
1149 		free_page((unsigned long)buf);
1150 	}
1151 	return 1;
1152 }
1153 
1154 /*
1155  * Decide what to dump of a segment, part, all or none.
1156  */
1157 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1158 				   unsigned long mm_flags)
1159 {
1160 	/* The vma can be set up to tell us the answer directly.  */
1161 	if (vma->vm_flags & VM_ALWAYSDUMP)
1162 		goto whole;
1163 
1164 	/* Do not dump I/O mapped devices or special mappings */
1165 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1166 		return 0;
1167 
1168 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1169 
1170 	/* By default, dump shared memory if mapped from an anonymous file. */
1171 	if (vma->vm_flags & VM_SHARED) {
1172 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1173 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1174 			goto whole;
1175 		return 0;
1176 	}
1177 
1178 	/* Dump segments that have been written to.  */
1179 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1180 		goto whole;
1181 	if (vma->vm_file == NULL)
1182 		return 0;
1183 
1184 	if (FILTER(MAPPED_PRIVATE))
1185 		goto whole;
1186 
1187 	/*
1188 	 * If this looks like the beginning of a DSO or executable mapping,
1189 	 * check for an ELF header.  If we find one, dump the first page to
1190 	 * aid in determining what was mapped here.
1191 	 */
1192 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1193 		u32 __user *header = (u32 __user *) vma->vm_start;
1194 		u32 word;
1195 		/*
1196 		 * Doing it this way gets the constant folded by GCC.
1197 		 */
1198 		union {
1199 			u32 cmp;
1200 			char elfmag[SELFMAG];
1201 		} magic;
1202 		BUILD_BUG_ON(SELFMAG != sizeof word);
1203 		magic.elfmag[EI_MAG0] = ELFMAG0;
1204 		magic.elfmag[EI_MAG1] = ELFMAG1;
1205 		magic.elfmag[EI_MAG2] = ELFMAG2;
1206 		magic.elfmag[EI_MAG3] = ELFMAG3;
1207 		if (get_user(word, header) == 0 && word == magic.cmp)
1208 			return PAGE_SIZE;
1209 	}
1210 
1211 #undef	FILTER
1212 
1213 	return 0;
1214 
1215 whole:
1216 	return vma->vm_end - vma->vm_start;
1217 }
1218 
1219 /* An ELF note in memory */
1220 struct memelfnote
1221 {
1222 	const char *name;
1223 	int type;
1224 	unsigned int datasz;
1225 	void *data;
1226 };
1227 
1228 static int notesize(struct memelfnote *en)
1229 {
1230 	int sz;
1231 
1232 	sz = sizeof(struct elf_note);
1233 	sz += roundup(strlen(en->name) + 1, 4);
1234 	sz += roundup(en->datasz, 4);
1235 
1236 	return sz;
1237 }
1238 
1239 #define DUMP_WRITE(addr, nr, foffset)	\
1240 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1241 
1242 static int alignfile(struct file *file, loff_t *foffset)
1243 {
1244 	static const char buf[4] = { 0, };
1245 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1246 	return 1;
1247 }
1248 
1249 static int writenote(struct memelfnote *men, struct file *file,
1250 			loff_t *foffset)
1251 {
1252 	struct elf_note en;
1253 	en.n_namesz = strlen(men->name) + 1;
1254 	en.n_descsz = men->datasz;
1255 	en.n_type = men->type;
1256 
1257 	DUMP_WRITE(&en, sizeof(en), foffset);
1258 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1259 	if (!alignfile(file, foffset))
1260 		return 0;
1261 	DUMP_WRITE(men->data, men->datasz, foffset);
1262 	if (!alignfile(file, foffset))
1263 		return 0;
1264 
1265 	return 1;
1266 }
1267 #undef DUMP_WRITE
1268 
1269 #define DUMP_WRITE(addr, nr)	\
1270 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1271 		goto end_coredump;
1272 #define DUMP_SEEK(off)	\
1273 	if (!dump_seek(file, (off))) \
1274 		goto end_coredump;
1275 
1276 static void fill_elf_header(struct elfhdr *elf, int segs,
1277 			    u16 machine, u32 flags, u8 osabi)
1278 {
1279 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1280 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1281 	elf->e_ident[EI_DATA] = ELF_DATA;
1282 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1283 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1284 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1285 
1286 	elf->e_type = ET_CORE;
1287 	elf->e_machine = machine;
1288 	elf->e_version = EV_CURRENT;
1289 	elf->e_entry = 0;
1290 	elf->e_phoff = sizeof(struct elfhdr);
1291 	elf->e_shoff = 0;
1292 	elf->e_flags = flags;
1293 	elf->e_ehsize = sizeof(struct elfhdr);
1294 	elf->e_phentsize = sizeof(struct elf_phdr);
1295 	elf->e_phnum = segs;
1296 	elf->e_shentsize = 0;
1297 	elf->e_shnum = 0;
1298 	elf->e_shstrndx = 0;
1299 	return;
1300 }
1301 
1302 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1303 {
1304 	phdr->p_type = PT_NOTE;
1305 	phdr->p_offset = offset;
1306 	phdr->p_vaddr = 0;
1307 	phdr->p_paddr = 0;
1308 	phdr->p_filesz = sz;
1309 	phdr->p_memsz = 0;
1310 	phdr->p_flags = 0;
1311 	phdr->p_align = 0;
1312 	return;
1313 }
1314 
1315 static void fill_note(struct memelfnote *note, const char *name, int type,
1316 		unsigned int sz, void *data)
1317 {
1318 	note->name = name;
1319 	note->type = type;
1320 	note->datasz = sz;
1321 	note->data = data;
1322 	return;
1323 }
1324 
1325 /*
1326  * fill up all the fields in prstatus from the given task struct, except
1327  * registers which need to be filled up separately.
1328  */
1329 static void fill_prstatus(struct elf_prstatus *prstatus,
1330 		struct task_struct *p, long signr)
1331 {
1332 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1333 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1334 	prstatus->pr_sighold = p->blocked.sig[0];
1335 	prstatus->pr_pid = task_pid_vnr(p);
1336 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1337 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1338 	prstatus->pr_sid = task_session_vnr(p);
1339 	if (thread_group_leader(p)) {
1340 		/*
1341 		 * This is the record for the group leader.  Add in the
1342 		 * cumulative times of previous dead threads.  This total
1343 		 * won't include the time of each live thread whose state
1344 		 * is included in the core dump.  The final total reported
1345 		 * to our parent process when it calls wait4 will include
1346 		 * those sums as well as the little bit more time it takes
1347 		 * this and each other thread to finish dying after the
1348 		 * core dump synchronization phase.
1349 		 */
1350 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1351 				   &prstatus->pr_utime);
1352 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1353 				   &prstatus->pr_stime);
1354 	} else {
1355 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1356 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1357 	}
1358 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1359 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1360 }
1361 
1362 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1363 		       struct mm_struct *mm)
1364 {
1365 	unsigned int i, len;
1366 
1367 	/* first copy the parameters from user space */
1368 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1369 
1370 	len = mm->arg_end - mm->arg_start;
1371 	if (len >= ELF_PRARGSZ)
1372 		len = ELF_PRARGSZ-1;
1373 	if (copy_from_user(&psinfo->pr_psargs,
1374 		           (const char __user *)mm->arg_start, len))
1375 		return -EFAULT;
1376 	for(i = 0; i < len; i++)
1377 		if (psinfo->pr_psargs[i] == 0)
1378 			psinfo->pr_psargs[i] = ' ';
1379 	psinfo->pr_psargs[len] = 0;
1380 
1381 	psinfo->pr_pid = task_pid_vnr(p);
1382 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1383 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1384 	psinfo->pr_sid = task_session_vnr(p);
1385 
1386 	i = p->state ? ffz(~p->state) + 1 : 0;
1387 	psinfo->pr_state = i;
1388 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1389 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1390 	psinfo->pr_nice = task_nice(p);
1391 	psinfo->pr_flag = p->flags;
1392 	SET_UID(psinfo->pr_uid, p->uid);
1393 	SET_GID(psinfo->pr_gid, p->gid);
1394 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1395 
1396 	return 0;
1397 }
1398 
1399 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1400 {
1401 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1402 	int i = 0;
1403 	do
1404 		i += 2;
1405 	while (auxv[i - 2] != AT_NULL);
1406 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1407 }
1408 
1409 #ifdef CORE_DUMP_USE_REGSET
1410 #include <linux/regset.h>
1411 
1412 struct elf_thread_core_info {
1413 	struct elf_thread_core_info *next;
1414 	struct task_struct *task;
1415 	struct elf_prstatus prstatus;
1416 	struct memelfnote notes[0];
1417 };
1418 
1419 struct elf_note_info {
1420 	struct elf_thread_core_info *thread;
1421 	struct memelfnote psinfo;
1422 	struct memelfnote auxv;
1423 	size_t size;
1424 	int thread_notes;
1425 };
1426 
1427 static int fill_thread_core_info(struct elf_thread_core_info *t,
1428 				 const struct user_regset_view *view,
1429 				 long signr, size_t *total)
1430 {
1431 	unsigned int i;
1432 
1433 	/*
1434 	 * NT_PRSTATUS is the one special case, because the regset data
1435 	 * goes into the pr_reg field inside the note contents, rather
1436 	 * than being the whole note contents.  We fill the reset in here.
1437 	 * We assume that regset 0 is NT_PRSTATUS.
1438 	 */
1439 	fill_prstatus(&t->prstatus, t->task, signr);
1440 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1441 				    0, sizeof(t->prstatus.pr_reg),
1442 				    &t->prstatus.pr_reg, NULL);
1443 
1444 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1445 		  sizeof(t->prstatus), &t->prstatus);
1446 	*total += notesize(&t->notes[0]);
1447 
1448 	/*
1449 	 * Each other regset might generate a note too.  For each regset
1450 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1451 	 * all zero and we'll know to skip writing it later.
1452 	 */
1453 	for (i = 1; i < view->n; ++i) {
1454 		const struct user_regset *regset = &view->regsets[i];
1455 		if (regset->core_note_type &&
1456 		    (!regset->active || regset->active(t->task, regset))) {
1457 			int ret;
1458 			size_t size = regset->n * regset->size;
1459 			void *data = kmalloc(size, GFP_KERNEL);
1460 			if (unlikely(!data))
1461 				return 0;
1462 			ret = regset->get(t->task, regset,
1463 					  0, size, data, NULL);
1464 			if (unlikely(ret))
1465 				kfree(data);
1466 			else {
1467 				if (regset->core_note_type != NT_PRFPREG)
1468 					fill_note(&t->notes[i], "LINUX",
1469 						  regset->core_note_type,
1470 						  size, data);
1471 				else {
1472 					t->prstatus.pr_fpvalid = 1;
1473 					fill_note(&t->notes[i], "CORE",
1474 						  NT_PRFPREG, size, data);
1475 				}
1476 				*total += notesize(&t->notes[i]);
1477 			}
1478 		}
1479 	}
1480 
1481 	return 1;
1482 }
1483 
1484 static int fill_note_info(struct elfhdr *elf, int phdrs,
1485 			  struct elf_note_info *info,
1486 			  long signr, struct pt_regs *regs)
1487 {
1488 	struct task_struct *dump_task = current;
1489 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1490 	struct elf_thread_core_info *t;
1491 	struct elf_prpsinfo *psinfo;
1492 	struct task_struct *g, *p;
1493 	unsigned int i;
1494 
1495 	info->size = 0;
1496 	info->thread = NULL;
1497 
1498 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1499 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1500 
1501 	if (psinfo == NULL)
1502 		return 0;
1503 
1504 	/*
1505 	 * Figure out how many notes we're going to need for each thread.
1506 	 */
1507 	info->thread_notes = 0;
1508 	for (i = 0; i < view->n; ++i)
1509 		if (view->regsets[i].core_note_type != 0)
1510 			++info->thread_notes;
1511 
1512 	/*
1513 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1514 	 * since it is our one special case.
1515 	 */
1516 	if (unlikely(info->thread_notes == 0) ||
1517 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1518 		WARN_ON(1);
1519 		return 0;
1520 	}
1521 
1522 	/*
1523 	 * Initialize the ELF file header.
1524 	 */
1525 	fill_elf_header(elf, phdrs,
1526 			view->e_machine, view->e_flags, view->ei_osabi);
1527 
1528 	/*
1529 	 * Allocate a structure for each thread.
1530 	 */
1531 	rcu_read_lock();
1532 	do_each_thread(g, p)
1533 		if (p->mm == dump_task->mm) {
1534 			t = kzalloc(offsetof(struct elf_thread_core_info,
1535 					     notes[info->thread_notes]),
1536 				    GFP_ATOMIC);
1537 			if (unlikely(!t)) {
1538 				rcu_read_unlock();
1539 				return 0;
1540 			}
1541 			t->task = p;
1542 			if (p == dump_task || !info->thread) {
1543 				t->next = info->thread;
1544 				info->thread = t;
1545 			} else {
1546 				/*
1547 				 * Make sure to keep the original task at
1548 				 * the head of the list.
1549 				 */
1550 				t->next = info->thread->next;
1551 				info->thread->next = t;
1552 			}
1553 		}
1554 	while_each_thread(g, p);
1555 	rcu_read_unlock();
1556 
1557 	/*
1558 	 * Now fill in each thread's information.
1559 	 */
1560 	for (t = info->thread; t != NULL; t = t->next)
1561 		if (!fill_thread_core_info(t, view, signr, &info->size))
1562 			return 0;
1563 
1564 	/*
1565 	 * Fill in the two process-wide notes.
1566 	 */
1567 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1568 	info->size += notesize(&info->psinfo);
1569 
1570 	fill_auxv_note(&info->auxv, current->mm);
1571 	info->size += notesize(&info->auxv);
1572 
1573 	return 1;
1574 }
1575 
1576 static size_t get_note_info_size(struct elf_note_info *info)
1577 {
1578 	return info->size;
1579 }
1580 
1581 /*
1582  * Write all the notes for each thread.  When writing the first thread, the
1583  * process-wide notes are interleaved after the first thread-specific note.
1584  */
1585 static int write_note_info(struct elf_note_info *info,
1586 			   struct file *file, loff_t *foffset)
1587 {
1588 	bool first = 1;
1589 	struct elf_thread_core_info *t = info->thread;
1590 
1591 	do {
1592 		int i;
1593 
1594 		if (!writenote(&t->notes[0], file, foffset))
1595 			return 0;
1596 
1597 		if (first && !writenote(&info->psinfo, file, foffset))
1598 			return 0;
1599 		if (first && !writenote(&info->auxv, file, foffset))
1600 			return 0;
1601 
1602 		for (i = 1; i < info->thread_notes; ++i)
1603 			if (t->notes[i].data &&
1604 			    !writenote(&t->notes[i], file, foffset))
1605 				return 0;
1606 
1607 		first = 0;
1608 		t = t->next;
1609 	} while (t);
1610 
1611 	return 1;
1612 }
1613 
1614 static void free_note_info(struct elf_note_info *info)
1615 {
1616 	struct elf_thread_core_info *threads = info->thread;
1617 	while (threads) {
1618 		unsigned int i;
1619 		struct elf_thread_core_info *t = threads;
1620 		threads = t->next;
1621 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1622 		for (i = 1; i < info->thread_notes; ++i)
1623 			kfree(t->notes[i].data);
1624 		kfree(t);
1625 	}
1626 	kfree(info->psinfo.data);
1627 }
1628 
1629 #else
1630 
1631 /* Here is the structure in which status of each thread is captured. */
1632 struct elf_thread_status
1633 {
1634 	struct list_head list;
1635 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1636 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1637 	struct task_struct *thread;
1638 #ifdef ELF_CORE_COPY_XFPREGS
1639 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1640 #endif
1641 	struct memelfnote notes[3];
1642 	int num_notes;
1643 };
1644 
1645 /*
1646  * In order to add the specific thread information for the elf file format,
1647  * we need to keep a linked list of every threads pr_status and then create
1648  * a single section for them in the final core file.
1649  */
1650 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1651 {
1652 	int sz = 0;
1653 	struct task_struct *p = t->thread;
1654 	t->num_notes = 0;
1655 
1656 	fill_prstatus(&t->prstatus, p, signr);
1657 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1658 
1659 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1660 		  &(t->prstatus));
1661 	t->num_notes++;
1662 	sz += notesize(&t->notes[0]);
1663 
1664 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1665 								&t->fpu))) {
1666 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1667 			  &(t->fpu));
1668 		t->num_notes++;
1669 		sz += notesize(&t->notes[1]);
1670 	}
1671 
1672 #ifdef ELF_CORE_COPY_XFPREGS
1673 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1674 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1675 			  sizeof(t->xfpu), &t->xfpu);
1676 		t->num_notes++;
1677 		sz += notesize(&t->notes[2]);
1678 	}
1679 #endif
1680 	return sz;
1681 }
1682 
1683 struct elf_note_info {
1684 	struct memelfnote *notes;
1685 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1686 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1687 	struct list_head thread_list;
1688 	elf_fpregset_t *fpu;
1689 #ifdef ELF_CORE_COPY_XFPREGS
1690 	elf_fpxregset_t *xfpu;
1691 #endif
1692 	int thread_status_size;
1693 	int numnote;
1694 };
1695 
1696 static int fill_note_info(struct elfhdr *elf, int phdrs,
1697 			  struct elf_note_info *info,
1698 			  long signr, struct pt_regs *regs)
1699 {
1700 #define	NUM_NOTES	6
1701 	struct list_head *t;
1702 	struct task_struct *g, *p;
1703 
1704 	info->notes = NULL;
1705 	info->prstatus = NULL;
1706 	info->psinfo = NULL;
1707 	info->fpu = NULL;
1708 #ifdef ELF_CORE_COPY_XFPREGS
1709 	info->xfpu = NULL;
1710 #endif
1711 	INIT_LIST_HEAD(&info->thread_list);
1712 
1713 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1714 			      GFP_KERNEL);
1715 	if (!info->notes)
1716 		return 0;
1717 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1718 	if (!info->psinfo)
1719 		return 0;
1720 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1721 	if (!info->prstatus)
1722 		return 0;
1723 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1724 	if (!info->fpu)
1725 		return 0;
1726 #ifdef ELF_CORE_COPY_XFPREGS
1727 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1728 	if (!info->xfpu)
1729 		return 0;
1730 #endif
1731 
1732 	info->thread_status_size = 0;
1733 	if (signr) {
1734 		struct elf_thread_status *tmp;
1735 		rcu_read_lock();
1736 		do_each_thread(g, p)
1737 			if (current->mm == p->mm && current != p) {
1738 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1739 				if (!tmp) {
1740 					rcu_read_unlock();
1741 					return 0;
1742 				}
1743 				tmp->thread = p;
1744 				list_add(&tmp->list, &info->thread_list);
1745 			}
1746 		while_each_thread(g, p);
1747 		rcu_read_unlock();
1748 		list_for_each(t, &info->thread_list) {
1749 			struct elf_thread_status *tmp;
1750 			int sz;
1751 
1752 			tmp = list_entry(t, struct elf_thread_status, list);
1753 			sz = elf_dump_thread_status(signr, tmp);
1754 			info->thread_status_size += sz;
1755 		}
1756 	}
1757 	/* now collect the dump for the current */
1758 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1759 	fill_prstatus(info->prstatus, current, signr);
1760 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1761 
1762 	/* Set up header */
1763 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1764 
1765 	/*
1766 	 * Set up the notes in similar form to SVR4 core dumps made
1767 	 * with info from their /proc.
1768 	 */
1769 
1770 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1771 		  sizeof(*info->prstatus), info->prstatus);
1772 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1773 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1774 		  sizeof(*info->psinfo), info->psinfo);
1775 
1776 	info->numnote = 2;
1777 
1778 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1779 
1780 	/* Try to dump the FPU. */
1781 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1782 							       info->fpu);
1783 	if (info->prstatus->pr_fpvalid)
1784 		fill_note(info->notes + info->numnote++,
1785 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1786 #ifdef ELF_CORE_COPY_XFPREGS
1787 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1788 		fill_note(info->notes + info->numnote++,
1789 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1790 			  sizeof(*info->xfpu), info->xfpu);
1791 #endif
1792 
1793 	return 1;
1794 
1795 #undef NUM_NOTES
1796 }
1797 
1798 static size_t get_note_info_size(struct elf_note_info *info)
1799 {
1800 	int sz = 0;
1801 	int i;
1802 
1803 	for (i = 0; i < info->numnote; i++)
1804 		sz += notesize(info->notes + i);
1805 
1806 	sz += info->thread_status_size;
1807 
1808 	return sz;
1809 }
1810 
1811 static int write_note_info(struct elf_note_info *info,
1812 			   struct file *file, loff_t *foffset)
1813 {
1814 	int i;
1815 	struct list_head *t;
1816 
1817 	for (i = 0; i < info->numnote; i++)
1818 		if (!writenote(info->notes + i, file, foffset))
1819 			return 0;
1820 
1821 	/* write out the thread status notes section */
1822 	list_for_each(t, &info->thread_list) {
1823 		struct elf_thread_status *tmp =
1824 				list_entry(t, struct elf_thread_status, list);
1825 
1826 		for (i = 0; i < tmp->num_notes; i++)
1827 			if (!writenote(&tmp->notes[i], file, foffset))
1828 				return 0;
1829 	}
1830 
1831 	return 1;
1832 }
1833 
1834 static void free_note_info(struct elf_note_info *info)
1835 {
1836 	while (!list_empty(&info->thread_list)) {
1837 		struct list_head *tmp = info->thread_list.next;
1838 		list_del(tmp);
1839 		kfree(list_entry(tmp, struct elf_thread_status, list));
1840 	}
1841 
1842 	kfree(info->prstatus);
1843 	kfree(info->psinfo);
1844 	kfree(info->notes);
1845 	kfree(info->fpu);
1846 #ifdef ELF_CORE_COPY_XFPREGS
1847 	kfree(info->xfpu);
1848 #endif
1849 }
1850 
1851 #endif
1852 
1853 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1854 					struct vm_area_struct *gate_vma)
1855 {
1856 	struct vm_area_struct *ret = tsk->mm->mmap;
1857 
1858 	if (ret)
1859 		return ret;
1860 	return gate_vma;
1861 }
1862 /*
1863  * Helper function for iterating across a vma list.  It ensures that the caller
1864  * will visit `gate_vma' prior to terminating the search.
1865  */
1866 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1867 					struct vm_area_struct *gate_vma)
1868 {
1869 	struct vm_area_struct *ret;
1870 
1871 	ret = this_vma->vm_next;
1872 	if (ret)
1873 		return ret;
1874 	if (this_vma == gate_vma)
1875 		return NULL;
1876 	return gate_vma;
1877 }
1878 
1879 /*
1880  * Actual dumper
1881  *
1882  * This is a two-pass process; first we find the offsets of the bits,
1883  * and then they are actually written out.  If we run out of core limit
1884  * we just truncate.
1885  */
1886 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1887 {
1888 	int has_dumped = 0;
1889 	mm_segment_t fs;
1890 	int segs;
1891 	size_t size = 0;
1892 	struct vm_area_struct *vma, *gate_vma;
1893 	struct elfhdr *elf = NULL;
1894 	loff_t offset = 0, dataoff, foffset;
1895 	unsigned long mm_flags;
1896 	struct elf_note_info info;
1897 
1898 	/*
1899 	 * We no longer stop all VM operations.
1900 	 *
1901 	 * This is because those proceses that could possibly change map_count
1902 	 * or the mmap / vma pages are now blocked in do_exit on current
1903 	 * finishing this core dump.
1904 	 *
1905 	 * Only ptrace can touch these memory addresses, but it doesn't change
1906 	 * the map_count or the pages allocated. So no possibility of crashing
1907 	 * exists while dumping the mm->vm_next areas to the core file.
1908 	 */
1909 
1910 	/* alloc memory for large data structures: too large to be on stack */
1911 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1912 	if (!elf)
1913 		goto cleanup;
1914 
1915 	segs = current->mm->map_count;
1916 #ifdef ELF_CORE_EXTRA_PHDRS
1917 	segs += ELF_CORE_EXTRA_PHDRS;
1918 #endif
1919 
1920 	gate_vma = get_gate_vma(current);
1921 	if (gate_vma != NULL)
1922 		segs++;
1923 
1924 	/*
1925 	 * Collect all the non-memory information about the process for the
1926 	 * notes.  This also sets up the file header.
1927 	 */
1928 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1929 			    &info, signr, regs))
1930 		goto cleanup;
1931 
1932 	has_dumped = 1;
1933 	current->flags |= PF_DUMPCORE;
1934 
1935 	fs = get_fs();
1936 	set_fs(KERNEL_DS);
1937 
1938 	DUMP_WRITE(elf, sizeof(*elf));
1939 	offset += sizeof(*elf);				/* Elf header */
1940 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1941 	foffset = offset;
1942 
1943 	/* Write notes phdr entry */
1944 	{
1945 		struct elf_phdr phdr;
1946 		size_t sz = get_note_info_size(&info);
1947 
1948 		sz += elf_coredump_extra_notes_size();
1949 
1950 		fill_elf_note_phdr(&phdr, sz, offset);
1951 		offset += sz;
1952 		DUMP_WRITE(&phdr, sizeof(phdr));
1953 	}
1954 
1955 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1956 
1957 	/*
1958 	 * We must use the same mm->flags while dumping core to avoid
1959 	 * inconsistency between the program headers and bodies, otherwise an
1960 	 * unusable core file can be generated.
1961 	 */
1962 	mm_flags = current->mm->flags;
1963 
1964 	/* Write program headers for segments dump */
1965 	for (vma = first_vma(current, gate_vma); vma != NULL;
1966 			vma = next_vma(vma, gate_vma)) {
1967 		struct elf_phdr phdr;
1968 
1969 		phdr.p_type = PT_LOAD;
1970 		phdr.p_offset = offset;
1971 		phdr.p_vaddr = vma->vm_start;
1972 		phdr.p_paddr = 0;
1973 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1974 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1975 		offset += phdr.p_filesz;
1976 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1977 		if (vma->vm_flags & VM_WRITE)
1978 			phdr.p_flags |= PF_W;
1979 		if (vma->vm_flags & VM_EXEC)
1980 			phdr.p_flags |= PF_X;
1981 		phdr.p_align = ELF_EXEC_PAGESIZE;
1982 
1983 		DUMP_WRITE(&phdr, sizeof(phdr));
1984 	}
1985 
1986 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1987 	ELF_CORE_WRITE_EXTRA_PHDRS;
1988 #endif
1989 
1990  	/* write out the notes section */
1991 	if (!write_note_info(&info, file, &foffset))
1992 		goto end_coredump;
1993 
1994 	if (elf_coredump_extra_notes_write(file, &foffset))
1995 		goto end_coredump;
1996 
1997 	/* Align to page */
1998 	DUMP_SEEK(dataoff - foffset);
1999 
2000 	for (vma = first_vma(current, gate_vma); vma != NULL;
2001 			vma = next_vma(vma, gate_vma)) {
2002 		unsigned long addr;
2003 		unsigned long end;
2004 
2005 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2006 
2007 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2008 			struct page *page;
2009 			struct vm_area_struct *vma;
2010 
2011 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2012 						&page, &vma) <= 0) {
2013 				DUMP_SEEK(PAGE_SIZE);
2014 			} else {
2015 				if (page == ZERO_PAGE(0)) {
2016 					if (!dump_seek(file, PAGE_SIZE)) {
2017 						page_cache_release(page);
2018 						goto end_coredump;
2019 					}
2020 				} else {
2021 					void *kaddr;
2022 					flush_cache_page(vma, addr,
2023 							 page_to_pfn(page));
2024 					kaddr = kmap(page);
2025 					if ((size += PAGE_SIZE) > limit ||
2026 					    !dump_write(file, kaddr,
2027 					    PAGE_SIZE)) {
2028 						kunmap(page);
2029 						page_cache_release(page);
2030 						goto end_coredump;
2031 					}
2032 					kunmap(page);
2033 				}
2034 				page_cache_release(page);
2035 			}
2036 		}
2037 	}
2038 
2039 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2040 	ELF_CORE_WRITE_EXTRA_DATA;
2041 #endif
2042 
2043 end_coredump:
2044 	set_fs(fs);
2045 
2046 cleanup:
2047 	kfree(elf);
2048 	free_note_info(&info);
2049 	return has_dumped;
2050 }
2051 
2052 #endif		/* USE_ELF_CORE_DUMP */
2053 
2054 static int __init init_elf_binfmt(void)
2055 {
2056 	return register_binfmt(&elf_format);
2057 }
2058 
2059 static void __exit exit_elf_binfmt(void)
2060 {
2061 	/* Remove the COFF and ELF loaders. */
2062 	unregister_binfmt(&elf_format);
2063 }
2064 
2065 core_initcall(init_elf_binfmt);
2066 module_exit(exit_elf_binfmt);
2067 MODULE_LICENSE("GPL");
2068