xref: /linux/fs/binfmt_elf.c (revision 7ec7fb394298c212c30e063c57e0aa895efe9439)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	const char *k_platform = ELF_PLATFORM;
156 	const char *k_base_platform = ELF_BASE_PLATFORM;
157 	int items;
158 	elf_addr_t *elf_info;
159 	int ei_index = 0;
160 	const struct cred *cred = current_cred();
161 	struct vm_area_struct *vma;
162 
163 	/*
164 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 	 * evictions by the processes running on the same package. One
166 	 * thing we can do is to shuffle the initial stack for them.
167 	 */
168 
169 	p = arch_align_stack(p);
170 
171 	/*
172 	 * If this architecture has a platform capability string, copy it
173 	 * to userspace.  In some cases (Sparc), this info is impossible
174 	 * for userspace to get any other way, in others (i386) it is
175 	 * merely difficult.
176 	 */
177 	u_platform = NULL;
178 	if (k_platform) {
179 		size_t len = strlen(k_platform) + 1;
180 
181 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
182 		if (__copy_to_user(u_platform, k_platform, len))
183 			return -EFAULT;
184 	}
185 
186 	/*
187 	 * If this architecture has a "base" platform capability
188 	 * string, copy it to userspace.
189 	 */
190 	u_base_platform = NULL;
191 	if (k_base_platform) {
192 		size_t len = strlen(k_base_platform) + 1;
193 
194 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 		if (__copy_to_user(u_base_platform, k_base_platform, len))
196 			return -EFAULT;
197 	}
198 
199 	/* Create the ELF interpreter info */
200 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
201 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
202 #define NEW_AUX_ENT(id, val) \
203 	do { \
204 		elf_info[ei_index++] = id; \
205 		elf_info[ei_index++] = val; \
206 	} while (0)
207 
208 #ifdef ARCH_DLINFO
209 	/*
210 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
211 	 * AUXV.
212 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
213 	 * ARCH_DLINFO changes
214 	 */
215 	ARCH_DLINFO;
216 #endif
217 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
218 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
219 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
220 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
221 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
222 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
223 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
224 	NEW_AUX_ENT(AT_FLAGS, 0);
225 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
226 	NEW_AUX_ENT(AT_UID, cred->uid);
227 	NEW_AUX_ENT(AT_EUID, cred->euid);
228 	NEW_AUX_ENT(AT_GID, cred->gid);
229 	NEW_AUX_ENT(AT_EGID, cred->egid);
230  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
232 	if (k_platform) {
233 		NEW_AUX_ENT(AT_PLATFORM,
234 			    (elf_addr_t)(unsigned long)u_platform);
235 	}
236 	if (k_base_platform) {
237 		NEW_AUX_ENT(AT_BASE_PLATFORM,
238 			    (elf_addr_t)(unsigned long)u_base_platform);
239 	}
240 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
241 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
242 	}
243 #undef NEW_AUX_ENT
244 	/* AT_NULL is zero; clear the rest too */
245 	memset(&elf_info[ei_index], 0,
246 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
247 
248 	/* And advance past the AT_NULL entry.  */
249 	ei_index += 2;
250 
251 	sp = STACK_ADD(p, ei_index);
252 
253 	items = (argc + 1) + (envc + 1) + 1;
254 	bprm->p = STACK_ROUND(sp, items);
255 
256 	/* Point sp at the lowest address on the stack */
257 #ifdef CONFIG_STACK_GROWSUP
258 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
259 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
260 #else
261 	sp = (elf_addr_t __user *)bprm->p;
262 #endif
263 
264 
265 	/*
266 	 * Grow the stack manually; some architectures have a limit on how
267 	 * far ahead a user-space access may be in order to grow the stack.
268 	 */
269 	vma = find_extend_vma(current->mm, bprm->p);
270 	if (!vma)
271 		return -EFAULT;
272 
273 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
274 	if (__put_user(argc, sp++))
275 		return -EFAULT;
276 	argv = sp;
277 	envp = argv + argc + 1;
278 
279 	/* Populate argv and envp */
280 	p = current->mm->arg_end = current->mm->arg_start;
281 	while (argc-- > 0) {
282 		size_t len;
283 		if (__put_user((elf_addr_t)p, argv++))
284 			return -EFAULT;
285 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
286 		if (!len || len > MAX_ARG_STRLEN)
287 			return -EINVAL;
288 		p += len;
289 	}
290 	if (__put_user(0, argv))
291 		return -EFAULT;
292 	current->mm->arg_end = current->mm->env_start = p;
293 	while (envc-- > 0) {
294 		size_t len;
295 		if (__put_user((elf_addr_t)p, envp++))
296 			return -EFAULT;
297 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 		if (!len || len > MAX_ARG_STRLEN)
299 			return -EINVAL;
300 		p += len;
301 	}
302 	if (__put_user(0, envp))
303 		return -EFAULT;
304 	current->mm->env_end = p;
305 
306 	/* Put the elf_info on the stack in the right place.  */
307 	sp = (elf_addr_t __user *)envp + 1;
308 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
309 		return -EFAULT;
310 	return 0;
311 }
312 
313 #ifndef elf_map
314 
315 static unsigned long elf_map(struct file *filep, unsigned long addr,
316 		struct elf_phdr *eppnt, int prot, int type,
317 		unsigned long total_size)
318 {
319 	unsigned long map_addr;
320 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
321 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
322 	addr = ELF_PAGESTART(addr);
323 	size = ELF_PAGEALIGN(size);
324 
325 	/* mmap() will return -EINVAL if given a zero size, but a
326 	 * segment with zero filesize is perfectly valid */
327 	if (!size)
328 		return addr;
329 
330 	down_write(&current->mm->mmap_sem);
331 	/*
332 	* total_size is the size of the ELF (interpreter) image.
333 	* The _first_ mmap needs to know the full size, otherwise
334 	* randomization might put this image into an overlapping
335 	* position with the ELF binary image. (since size < total_size)
336 	* So we first map the 'big' image - and unmap the remainder at
337 	* the end. (which unmap is needed for ELF images with holes.)
338 	*/
339 	if (total_size) {
340 		total_size = ELF_PAGEALIGN(total_size);
341 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
342 		if (!BAD_ADDR(map_addr))
343 			do_munmap(current->mm, map_addr+size, total_size-size);
344 	} else
345 		map_addr = do_mmap(filep, addr, size, prot, type, off);
346 
347 	up_write(&current->mm->mmap_sem);
348 	return(map_addr);
349 }
350 
351 #endif /* !elf_map */
352 
353 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
354 {
355 	int i, first_idx = -1, last_idx = -1;
356 
357 	for (i = 0; i < nr; i++) {
358 		if (cmds[i].p_type == PT_LOAD) {
359 			last_idx = i;
360 			if (first_idx == -1)
361 				first_idx = i;
362 		}
363 	}
364 	if (first_idx == -1)
365 		return 0;
366 
367 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
368 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 }
370 
371 
372 /* This is much more generalized than the library routine read function,
373    so we keep this separate.  Technically the library read function
374    is only provided so that we can read a.out libraries that have
375    an ELF header */
376 
377 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
378 		struct file *interpreter, unsigned long *interp_map_addr,
379 		unsigned long no_base)
380 {
381 	struct elf_phdr *elf_phdata;
382 	struct elf_phdr *eppnt;
383 	unsigned long load_addr = 0;
384 	int load_addr_set = 0;
385 	unsigned long last_bss = 0, elf_bss = 0;
386 	unsigned long error = ~0UL;
387 	unsigned long total_size;
388 	int retval, i, size;
389 
390 	/* First of all, some simple consistency checks */
391 	if (interp_elf_ex->e_type != ET_EXEC &&
392 	    interp_elf_ex->e_type != ET_DYN)
393 		goto out;
394 	if (!elf_check_arch(interp_elf_ex))
395 		goto out;
396 	if (!interpreter->f_op || !interpreter->f_op->mmap)
397 		goto out;
398 
399 	/*
400 	 * If the size of this structure has changed, then punt, since
401 	 * we will be doing the wrong thing.
402 	 */
403 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
404 		goto out;
405 	if (interp_elf_ex->e_phnum < 1 ||
406 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
407 		goto out;
408 
409 	/* Now read in all of the header information */
410 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
411 	if (size > ELF_MIN_ALIGN)
412 		goto out;
413 	elf_phdata = kmalloc(size, GFP_KERNEL);
414 	if (!elf_phdata)
415 		goto out;
416 
417 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
418 			     (char *)elf_phdata,size);
419 	error = -EIO;
420 	if (retval != size) {
421 		if (retval < 0)
422 			error = retval;
423 		goto out_close;
424 	}
425 
426 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
427 	if (!total_size) {
428 		error = -EINVAL;
429 		goto out_close;
430 	}
431 
432 	eppnt = elf_phdata;
433 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
434 		if (eppnt->p_type == PT_LOAD) {
435 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
436 			int elf_prot = 0;
437 			unsigned long vaddr = 0;
438 			unsigned long k, map_addr;
439 
440 			if (eppnt->p_flags & PF_R)
441 		    		elf_prot = PROT_READ;
442 			if (eppnt->p_flags & PF_W)
443 				elf_prot |= PROT_WRITE;
444 			if (eppnt->p_flags & PF_X)
445 				elf_prot |= PROT_EXEC;
446 			vaddr = eppnt->p_vaddr;
447 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
448 				elf_type |= MAP_FIXED;
449 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
450 				load_addr = -vaddr;
451 
452 			map_addr = elf_map(interpreter, load_addr + vaddr,
453 					eppnt, elf_prot, elf_type, total_size);
454 			total_size = 0;
455 			if (!*interp_map_addr)
456 				*interp_map_addr = map_addr;
457 			error = map_addr;
458 			if (BAD_ADDR(map_addr))
459 				goto out_close;
460 
461 			if (!load_addr_set &&
462 			    interp_elf_ex->e_type == ET_DYN) {
463 				load_addr = map_addr - ELF_PAGESTART(vaddr);
464 				load_addr_set = 1;
465 			}
466 
467 			/*
468 			 * Check to see if the section's size will overflow the
469 			 * allowed task size. Note that p_filesz must always be
470 			 * <= p_memsize so it's only necessary to check p_memsz.
471 			 */
472 			k = load_addr + eppnt->p_vaddr;
473 			if (BAD_ADDR(k) ||
474 			    eppnt->p_filesz > eppnt->p_memsz ||
475 			    eppnt->p_memsz > TASK_SIZE ||
476 			    TASK_SIZE - eppnt->p_memsz < k) {
477 				error = -ENOMEM;
478 				goto out_close;
479 			}
480 
481 			/*
482 			 * Find the end of the file mapping for this phdr, and
483 			 * keep track of the largest address we see for this.
484 			 */
485 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
486 			if (k > elf_bss)
487 				elf_bss = k;
488 
489 			/*
490 			 * Do the same thing for the memory mapping - between
491 			 * elf_bss and last_bss is the bss section.
492 			 */
493 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
494 			if (k > last_bss)
495 				last_bss = k;
496 		}
497 	}
498 
499 	/*
500 	 * Now fill out the bss section.  First pad the last page up
501 	 * to the page boundary, and then perform a mmap to make sure
502 	 * that there are zero-mapped pages up to and including the
503 	 * last bss page.
504 	 */
505 	if (padzero(elf_bss)) {
506 		error = -EFAULT;
507 		goto out_close;
508 	}
509 
510 	/* What we have mapped so far */
511 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
512 
513 	/* Map the last of the bss segment */
514 	if (last_bss > elf_bss) {
515 		down_write(&current->mm->mmap_sem);
516 		error = do_brk(elf_bss, last_bss - elf_bss);
517 		up_write(&current->mm->mmap_sem);
518 		if (BAD_ADDR(error))
519 			goto out_close;
520 	}
521 
522 	error = load_addr;
523 
524 out_close:
525 	kfree(elf_phdata);
526 out:
527 	return error;
528 }
529 
530 /*
531  * These are the functions used to load ELF style executables and shared
532  * libraries.  There is no binary dependent code anywhere else.
533  */
534 
535 #define INTERPRETER_NONE 0
536 #define INTERPRETER_ELF 2
537 
538 #ifndef STACK_RND_MASK
539 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
540 #endif
541 
542 static unsigned long randomize_stack_top(unsigned long stack_top)
543 {
544 	unsigned int random_variable = 0;
545 
546 	if ((current->flags & PF_RANDOMIZE) &&
547 		!(current->personality & ADDR_NO_RANDOMIZE)) {
548 		random_variable = get_random_int() & STACK_RND_MASK;
549 		random_variable <<= PAGE_SHIFT;
550 	}
551 #ifdef CONFIG_STACK_GROWSUP
552 	return PAGE_ALIGN(stack_top) + random_variable;
553 #else
554 	return PAGE_ALIGN(stack_top) - random_variable;
555 #endif
556 }
557 
558 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
559 {
560 	struct file *interpreter = NULL; /* to shut gcc up */
561  	unsigned long load_addr = 0, load_bias = 0;
562 	int load_addr_set = 0;
563 	char * elf_interpreter = NULL;
564 	unsigned long error;
565 	struct elf_phdr *elf_ppnt, *elf_phdata;
566 	unsigned long elf_bss, elf_brk;
567 	int elf_exec_fileno;
568 	int retval, i;
569 	unsigned int size;
570 	unsigned long elf_entry;
571 	unsigned long interp_load_addr = 0;
572 	unsigned long start_code, end_code, start_data, end_data;
573 	unsigned long reloc_func_desc = 0;
574 	int executable_stack = EXSTACK_DEFAULT;
575 	unsigned long def_flags = 0;
576 	struct {
577 		struct elfhdr elf_ex;
578 		struct elfhdr interp_elf_ex;
579 	} *loc;
580 
581 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582 	if (!loc) {
583 		retval = -ENOMEM;
584 		goto out_ret;
585 	}
586 
587 	/* Get the exec-header */
588 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
589 
590 	retval = -ENOEXEC;
591 	/* First of all, some simple consistency checks */
592 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593 		goto out;
594 
595 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596 		goto out;
597 	if (!elf_check_arch(&loc->elf_ex))
598 		goto out;
599 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
600 		goto out;
601 
602 	/* Now read in all of the header information */
603 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604 		goto out;
605 	if (loc->elf_ex.e_phnum < 1 ||
606 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607 		goto out;
608 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609 	retval = -ENOMEM;
610 	elf_phdata = kmalloc(size, GFP_KERNEL);
611 	if (!elf_phdata)
612 		goto out;
613 
614 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615 			     (char *)elf_phdata, size);
616 	if (retval != size) {
617 		if (retval >= 0)
618 			retval = -EIO;
619 		goto out_free_ph;
620 	}
621 
622 	retval = get_unused_fd();
623 	if (retval < 0)
624 		goto out_free_ph;
625 	get_file(bprm->file);
626 	fd_install(elf_exec_fileno = retval, bprm->file);
627 
628 	elf_ppnt = elf_phdata;
629 	elf_bss = 0;
630 	elf_brk = 0;
631 
632 	start_code = ~0UL;
633 	end_code = 0;
634 	start_data = 0;
635 	end_data = 0;
636 
637 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
638 		if (elf_ppnt->p_type == PT_INTERP) {
639 			/* This is the program interpreter used for
640 			 * shared libraries - for now assume that this
641 			 * is an a.out format binary
642 			 */
643 			retval = -ENOEXEC;
644 			if (elf_ppnt->p_filesz > PATH_MAX ||
645 			    elf_ppnt->p_filesz < 2)
646 				goto out_free_file;
647 
648 			retval = -ENOMEM;
649 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
650 						  GFP_KERNEL);
651 			if (!elf_interpreter)
652 				goto out_free_file;
653 
654 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
655 					     elf_interpreter,
656 					     elf_ppnt->p_filesz);
657 			if (retval != elf_ppnt->p_filesz) {
658 				if (retval >= 0)
659 					retval = -EIO;
660 				goto out_free_interp;
661 			}
662 			/* make sure path is NULL terminated */
663 			retval = -ENOEXEC;
664 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
665 				goto out_free_interp;
666 
667 			/*
668 			 * The early SET_PERSONALITY here is so that the lookup
669 			 * for the interpreter happens in the namespace of the
670 			 * to-be-execed image.  SET_PERSONALITY can select an
671 			 * alternate root.
672 			 *
673 			 * However, SET_PERSONALITY is NOT allowed to switch
674 			 * this task into the new images's memory mapping
675 			 * policy - that is, TASK_SIZE must still evaluate to
676 			 * that which is appropriate to the execing application.
677 			 * This is because exit_mmap() needs to have TASK_SIZE
678 			 * evaluate to the size of the old image.
679 			 *
680 			 * So if (say) a 64-bit application is execing a 32-bit
681 			 * application it is the architecture's responsibility
682 			 * to defer changing the value of TASK_SIZE until the
683 			 * switch really is going to happen - do this in
684 			 * flush_thread().	- akpm
685 			 */
686 			SET_PERSONALITY(loc->elf_ex);
687 
688 			interpreter = open_exec(elf_interpreter);
689 			retval = PTR_ERR(interpreter);
690 			if (IS_ERR(interpreter))
691 				goto out_free_interp;
692 
693 			/*
694 			 * If the binary is not readable then enforce
695 			 * mm->dumpable = 0 regardless of the interpreter's
696 			 * permissions.
697 			 */
698 			if (file_permission(interpreter, MAY_READ) < 0)
699 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
700 
701 			retval = kernel_read(interpreter, 0, bprm->buf,
702 					     BINPRM_BUF_SIZE);
703 			if (retval != BINPRM_BUF_SIZE) {
704 				if (retval >= 0)
705 					retval = -EIO;
706 				goto out_free_dentry;
707 			}
708 
709 			/* Get the exec headers */
710 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
711 			break;
712 		}
713 		elf_ppnt++;
714 	}
715 
716 	elf_ppnt = elf_phdata;
717 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
718 		if (elf_ppnt->p_type == PT_GNU_STACK) {
719 			if (elf_ppnt->p_flags & PF_X)
720 				executable_stack = EXSTACK_ENABLE_X;
721 			else
722 				executable_stack = EXSTACK_DISABLE_X;
723 			break;
724 		}
725 
726 	/* Some simple consistency checks for the interpreter */
727 	if (elf_interpreter) {
728 		retval = -ELIBBAD;
729 		/* Not an ELF interpreter */
730 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
731 			goto out_free_dentry;
732 		/* Verify the interpreter has a valid arch */
733 		if (!elf_check_arch(&loc->interp_elf_ex))
734 			goto out_free_dentry;
735 	} else {
736 		/* Executables without an interpreter also need a personality  */
737 		SET_PERSONALITY(loc->elf_ex);
738 	}
739 
740 	/* Flush all traces of the currently running executable */
741 	retval = flush_old_exec(bprm);
742 	if (retval)
743 		goto out_free_dentry;
744 
745 	/* OK, This is the point of no return */
746 	current->flags &= ~PF_FORKNOEXEC;
747 	current->mm->def_flags = def_flags;
748 
749 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
750 	   may depend on the personality.  */
751 	SET_PERSONALITY(loc->elf_ex);
752 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
753 		current->personality |= READ_IMPLIES_EXEC;
754 
755 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
756 		current->flags |= PF_RANDOMIZE;
757 	arch_pick_mmap_layout(current->mm);
758 
759 	/* Do this so that we can load the interpreter, if need be.  We will
760 	   change some of these later */
761 	current->mm->free_area_cache = current->mm->mmap_base;
762 	current->mm->cached_hole_size = 0;
763 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
764 				 executable_stack);
765 	if (retval < 0) {
766 		send_sig(SIGKILL, current, 0);
767 		goto out_free_dentry;
768 	}
769 
770 	current->mm->start_stack = bprm->p;
771 
772 	/* Now we do a little grungy work by mmaping the ELF image into
773 	   the correct location in memory. */
774 	for(i = 0, elf_ppnt = elf_phdata;
775 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
776 		int elf_prot = 0, elf_flags;
777 		unsigned long k, vaddr;
778 
779 		if (elf_ppnt->p_type != PT_LOAD)
780 			continue;
781 
782 		if (unlikely (elf_brk > elf_bss)) {
783 			unsigned long nbyte;
784 
785 			/* There was a PT_LOAD segment with p_memsz > p_filesz
786 			   before this one. Map anonymous pages, if needed,
787 			   and clear the area.  */
788 			retval = set_brk (elf_bss + load_bias,
789 					  elf_brk + load_bias);
790 			if (retval) {
791 				send_sig(SIGKILL, current, 0);
792 				goto out_free_dentry;
793 			}
794 			nbyte = ELF_PAGEOFFSET(elf_bss);
795 			if (nbyte) {
796 				nbyte = ELF_MIN_ALIGN - nbyte;
797 				if (nbyte > elf_brk - elf_bss)
798 					nbyte = elf_brk - elf_bss;
799 				if (clear_user((void __user *)elf_bss +
800 							load_bias, nbyte)) {
801 					/*
802 					 * This bss-zeroing can fail if the ELF
803 					 * file specifies odd protections. So
804 					 * we don't check the return value
805 					 */
806 				}
807 			}
808 		}
809 
810 		if (elf_ppnt->p_flags & PF_R)
811 			elf_prot |= PROT_READ;
812 		if (elf_ppnt->p_flags & PF_W)
813 			elf_prot |= PROT_WRITE;
814 		if (elf_ppnt->p_flags & PF_X)
815 			elf_prot |= PROT_EXEC;
816 
817 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
818 
819 		vaddr = elf_ppnt->p_vaddr;
820 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
821 			elf_flags |= MAP_FIXED;
822 		} else if (loc->elf_ex.e_type == ET_DYN) {
823 			/* Try and get dynamic programs out of the way of the
824 			 * default mmap base, as well as whatever program they
825 			 * might try to exec.  This is because the brk will
826 			 * follow the loader, and is not movable.  */
827 #ifdef CONFIG_X86
828 			load_bias = 0;
829 #else
830 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
831 #endif
832 		}
833 
834 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
835 				elf_prot, elf_flags, 0);
836 		if (BAD_ADDR(error)) {
837 			send_sig(SIGKILL, current, 0);
838 			retval = IS_ERR((void *)error) ?
839 				PTR_ERR((void*)error) : -EINVAL;
840 			goto out_free_dentry;
841 		}
842 
843 		if (!load_addr_set) {
844 			load_addr_set = 1;
845 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
846 			if (loc->elf_ex.e_type == ET_DYN) {
847 				load_bias += error -
848 				             ELF_PAGESTART(load_bias + vaddr);
849 				load_addr += load_bias;
850 				reloc_func_desc = load_bias;
851 			}
852 		}
853 		k = elf_ppnt->p_vaddr;
854 		if (k < start_code)
855 			start_code = k;
856 		if (start_data < k)
857 			start_data = k;
858 
859 		/*
860 		 * Check to see if the section's size will overflow the
861 		 * allowed task size. Note that p_filesz must always be
862 		 * <= p_memsz so it is only necessary to check p_memsz.
863 		 */
864 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
865 		    elf_ppnt->p_memsz > TASK_SIZE ||
866 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
867 			/* set_brk can never work. Avoid overflows. */
868 			send_sig(SIGKILL, current, 0);
869 			retval = -EINVAL;
870 			goto out_free_dentry;
871 		}
872 
873 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
874 
875 		if (k > elf_bss)
876 			elf_bss = k;
877 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
878 			end_code = k;
879 		if (end_data < k)
880 			end_data = k;
881 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
882 		if (k > elf_brk)
883 			elf_brk = k;
884 	}
885 
886 	loc->elf_ex.e_entry += load_bias;
887 	elf_bss += load_bias;
888 	elf_brk += load_bias;
889 	start_code += load_bias;
890 	end_code += load_bias;
891 	start_data += load_bias;
892 	end_data += load_bias;
893 
894 	/* Calling set_brk effectively mmaps the pages that we need
895 	 * for the bss and break sections.  We must do this before
896 	 * mapping in the interpreter, to make sure it doesn't wind
897 	 * up getting placed where the bss needs to go.
898 	 */
899 	retval = set_brk(elf_bss, elf_brk);
900 	if (retval) {
901 		send_sig(SIGKILL, current, 0);
902 		goto out_free_dentry;
903 	}
904 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
905 		send_sig(SIGSEGV, current, 0);
906 		retval = -EFAULT; /* Nobody gets to see this, but.. */
907 		goto out_free_dentry;
908 	}
909 
910 	if (elf_interpreter) {
911 		unsigned long uninitialized_var(interp_map_addr);
912 
913 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
914 					    interpreter,
915 					    &interp_map_addr,
916 					    load_bias);
917 		if (!IS_ERR((void *)elf_entry)) {
918 			/*
919 			 * load_elf_interp() returns relocation
920 			 * adjustment
921 			 */
922 			interp_load_addr = elf_entry;
923 			elf_entry += loc->interp_elf_ex.e_entry;
924 		}
925 		if (BAD_ADDR(elf_entry)) {
926 			force_sig(SIGSEGV, current);
927 			retval = IS_ERR((void *)elf_entry) ?
928 					(int)elf_entry : -EINVAL;
929 			goto out_free_dentry;
930 		}
931 		reloc_func_desc = interp_load_addr;
932 
933 		allow_write_access(interpreter);
934 		fput(interpreter);
935 		kfree(elf_interpreter);
936 	} else {
937 		elf_entry = loc->elf_ex.e_entry;
938 		if (BAD_ADDR(elf_entry)) {
939 			force_sig(SIGSEGV, current);
940 			retval = -EINVAL;
941 			goto out_free_dentry;
942 		}
943 	}
944 
945 	kfree(elf_phdata);
946 
947 	sys_close(elf_exec_fileno);
948 
949 	set_binfmt(&elf_format);
950 
951 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
952 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
953 	if (retval < 0) {
954 		send_sig(SIGKILL, current, 0);
955 		goto out;
956 	}
957 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
958 
959 	install_exec_creds(bprm);
960 	current->flags &= ~PF_FORKNOEXEC;
961 	retval = create_elf_tables(bprm, &loc->elf_ex,
962 			  load_addr, interp_load_addr);
963 	if (retval < 0) {
964 		send_sig(SIGKILL, current, 0);
965 		goto out;
966 	}
967 	/* N.B. passed_fileno might not be initialized? */
968 	current->mm->end_code = end_code;
969 	current->mm->start_code = start_code;
970 	current->mm->start_data = start_data;
971 	current->mm->end_data = end_data;
972 	current->mm->start_stack = bprm->p;
973 
974 #ifdef arch_randomize_brk
975 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
976 		current->mm->brk = current->mm->start_brk =
977 			arch_randomize_brk(current->mm);
978 #endif
979 
980 	if (current->personality & MMAP_PAGE_ZERO) {
981 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
982 		   and some applications "depend" upon this behavior.
983 		   Since we do not have the power to recompile these, we
984 		   emulate the SVr4 behavior. Sigh. */
985 		down_write(&current->mm->mmap_sem);
986 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
987 				MAP_FIXED | MAP_PRIVATE, 0);
988 		up_write(&current->mm->mmap_sem);
989 	}
990 
991 #ifdef ELF_PLAT_INIT
992 	/*
993 	 * The ABI may specify that certain registers be set up in special
994 	 * ways (on i386 %edx is the address of a DT_FINI function, for
995 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
996 	 * that the e_entry field is the address of the function descriptor
997 	 * for the startup routine, rather than the address of the startup
998 	 * routine itself.  This macro performs whatever initialization to
999 	 * the regs structure is required as well as any relocations to the
1000 	 * function descriptor entries when executing dynamically links apps.
1001 	 */
1002 	ELF_PLAT_INIT(regs, reloc_func_desc);
1003 #endif
1004 
1005 	start_thread(regs, elf_entry, bprm->p);
1006 	retval = 0;
1007 out:
1008 	kfree(loc);
1009 out_ret:
1010 	return retval;
1011 
1012 	/* error cleanup */
1013 out_free_dentry:
1014 	allow_write_access(interpreter);
1015 	if (interpreter)
1016 		fput(interpreter);
1017 out_free_interp:
1018 	kfree(elf_interpreter);
1019 out_free_file:
1020 	sys_close(elf_exec_fileno);
1021 out_free_ph:
1022 	kfree(elf_phdata);
1023 	goto out;
1024 }
1025 
1026 /* This is really simpleminded and specialized - we are loading an
1027    a.out library that is given an ELF header. */
1028 static int load_elf_library(struct file *file)
1029 {
1030 	struct elf_phdr *elf_phdata;
1031 	struct elf_phdr *eppnt;
1032 	unsigned long elf_bss, bss, len;
1033 	int retval, error, i, j;
1034 	struct elfhdr elf_ex;
1035 
1036 	error = -ENOEXEC;
1037 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1038 	if (retval != sizeof(elf_ex))
1039 		goto out;
1040 
1041 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1042 		goto out;
1043 
1044 	/* First of all, some simple consistency checks */
1045 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1046 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1047 		goto out;
1048 
1049 	/* Now read in all of the header information */
1050 
1051 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1052 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1053 
1054 	error = -ENOMEM;
1055 	elf_phdata = kmalloc(j, GFP_KERNEL);
1056 	if (!elf_phdata)
1057 		goto out;
1058 
1059 	eppnt = elf_phdata;
1060 	error = -ENOEXEC;
1061 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1062 	if (retval != j)
1063 		goto out_free_ph;
1064 
1065 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1066 		if ((eppnt + i)->p_type == PT_LOAD)
1067 			j++;
1068 	if (j != 1)
1069 		goto out_free_ph;
1070 
1071 	while (eppnt->p_type != PT_LOAD)
1072 		eppnt++;
1073 
1074 	/* Now use mmap to map the library into memory. */
1075 	down_write(&current->mm->mmap_sem);
1076 	error = do_mmap(file,
1077 			ELF_PAGESTART(eppnt->p_vaddr),
1078 			(eppnt->p_filesz +
1079 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1080 			PROT_READ | PROT_WRITE | PROT_EXEC,
1081 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1082 			(eppnt->p_offset -
1083 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1084 	up_write(&current->mm->mmap_sem);
1085 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1086 		goto out_free_ph;
1087 
1088 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1089 	if (padzero(elf_bss)) {
1090 		error = -EFAULT;
1091 		goto out_free_ph;
1092 	}
1093 
1094 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1095 			    ELF_MIN_ALIGN - 1);
1096 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1097 	if (bss > len) {
1098 		down_write(&current->mm->mmap_sem);
1099 		do_brk(len, bss - len);
1100 		up_write(&current->mm->mmap_sem);
1101 	}
1102 	error = 0;
1103 
1104 out_free_ph:
1105 	kfree(elf_phdata);
1106 out:
1107 	return error;
1108 }
1109 
1110 /*
1111  * Note that some platforms still use traditional core dumps and not
1112  * the ELF core dump.  Each platform can select it as appropriate.
1113  */
1114 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1115 
1116 /*
1117  * ELF core dumper
1118  *
1119  * Modelled on fs/exec.c:aout_core_dump()
1120  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1121  */
1122 /*
1123  * These are the only things you should do on a core-file: use only these
1124  * functions to write out all the necessary info.
1125  */
1126 static int dump_write(struct file *file, const void *addr, int nr)
1127 {
1128 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1129 }
1130 
1131 static int dump_seek(struct file *file, loff_t off)
1132 {
1133 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1134 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1135 			return 0;
1136 	} else {
1137 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1138 		if (!buf)
1139 			return 0;
1140 		while (off > 0) {
1141 			unsigned long n = off;
1142 			if (n > PAGE_SIZE)
1143 				n = PAGE_SIZE;
1144 			if (!dump_write(file, buf, n))
1145 				return 0;
1146 			off -= n;
1147 		}
1148 		free_page((unsigned long)buf);
1149 	}
1150 	return 1;
1151 }
1152 
1153 /*
1154  * Decide what to dump of a segment, part, all or none.
1155  */
1156 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1157 				   unsigned long mm_flags)
1158 {
1159 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1160 
1161 	/* The vma can be set up to tell us the answer directly.  */
1162 	if (vma->vm_flags & VM_ALWAYSDUMP)
1163 		goto whole;
1164 
1165 	/* Hugetlb memory check */
1166 	if (vma->vm_flags & VM_HUGETLB) {
1167 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1168 			goto whole;
1169 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1170 			goto whole;
1171 	}
1172 
1173 	/* Do not dump I/O mapped devices or special mappings */
1174 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1175 		return 0;
1176 
1177 	/* By default, dump shared memory if mapped from an anonymous file. */
1178 	if (vma->vm_flags & VM_SHARED) {
1179 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1180 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1181 			goto whole;
1182 		return 0;
1183 	}
1184 
1185 	/* Dump segments that have been written to.  */
1186 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1187 		goto whole;
1188 	if (vma->vm_file == NULL)
1189 		return 0;
1190 
1191 	if (FILTER(MAPPED_PRIVATE))
1192 		goto whole;
1193 
1194 	/*
1195 	 * If this looks like the beginning of a DSO or executable mapping,
1196 	 * check for an ELF header.  If we find one, dump the first page to
1197 	 * aid in determining what was mapped here.
1198 	 */
1199 	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1200 		u32 __user *header = (u32 __user *) vma->vm_start;
1201 		u32 word;
1202 		/*
1203 		 * Doing it this way gets the constant folded by GCC.
1204 		 */
1205 		union {
1206 			u32 cmp;
1207 			char elfmag[SELFMAG];
1208 		} magic;
1209 		BUILD_BUG_ON(SELFMAG != sizeof word);
1210 		magic.elfmag[EI_MAG0] = ELFMAG0;
1211 		magic.elfmag[EI_MAG1] = ELFMAG1;
1212 		magic.elfmag[EI_MAG2] = ELFMAG2;
1213 		magic.elfmag[EI_MAG3] = ELFMAG3;
1214 		if (get_user(word, header) == 0 && word == magic.cmp)
1215 			return PAGE_SIZE;
1216 	}
1217 
1218 #undef	FILTER
1219 
1220 	return 0;
1221 
1222 whole:
1223 	return vma->vm_end - vma->vm_start;
1224 }
1225 
1226 /* An ELF note in memory */
1227 struct memelfnote
1228 {
1229 	const char *name;
1230 	int type;
1231 	unsigned int datasz;
1232 	void *data;
1233 };
1234 
1235 static int notesize(struct memelfnote *en)
1236 {
1237 	int sz;
1238 
1239 	sz = sizeof(struct elf_note);
1240 	sz += roundup(strlen(en->name) + 1, 4);
1241 	sz += roundup(en->datasz, 4);
1242 
1243 	return sz;
1244 }
1245 
1246 #define DUMP_WRITE(addr, nr, foffset)	\
1247 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1248 
1249 static int alignfile(struct file *file, loff_t *foffset)
1250 {
1251 	static const char buf[4] = { 0, };
1252 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1253 	return 1;
1254 }
1255 
1256 static int writenote(struct memelfnote *men, struct file *file,
1257 			loff_t *foffset)
1258 {
1259 	struct elf_note en;
1260 	en.n_namesz = strlen(men->name) + 1;
1261 	en.n_descsz = men->datasz;
1262 	en.n_type = men->type;
1263 
1264 	DUMP_WRITE(&en, sizeof(en), foffset);
1265 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1266 	if (!alignfile(file, foffset))
1267 		return 0;
1268 	DUMP_WRITE(men->data, men->datasz, foffset);
1269 	if (!alignfile(file, foffset))
1270 		return 0;
1271 
1272 	return 1;
1273 }
1274 #undef DUMP_WRITE
1275 
1276 #define DUMP_WRITE(addr, nr)	\
1277 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1278 		goto end_coredump;
1279 #define DUMP_SEEK(off)	\
1280 	if (!dump_seek(file, (off))) \
1281 		goto end_coredump;
1282 
1283 static void fill_elf_header(struct elfhdr *elf, int segs,
1284 			    u16 machine, u32 flags, u8 osabi)
1285 {
1286 	memset(elf, 0, sizeof(*elf));
1287 
1288 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1289 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1290 	elf->e_ident[EI_DATA] = ELF_DATA;
1291 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1292 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1293 
1294 	elf->e_type = ET_CORE;
1295 	elf->e_machine = machine;
1296 	elf->e_version = EV_CURRENT;
1297 	elf->e_phoff = sizeof(struct elfhdr);
1298 	elf->e_flags = flags;
1299 	elf->e_ehsize = sizeof(struct elfhdr);
1300 	elf->e_phentsize = sizeof(struct elf_phdr);
1301 	elf->e_phnum = segs;
1302 
1303 	return;
1304 }
1305 
1306 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1307 {
1308 	phdr->p_type = PT_NOTE;
1309 	phdr->p_offset = offset;
1310 	phdr->p_vaddr = 0;
1311 	phdr->p_paddr = 0;
1312 	phdr->p_filesz = sz;
1313 	phdr->p_memsz = 0;
1314 	phdr->p_flags = 0;
1315 	phdr->p_align = 0;
1316 	return;
1317 }
1318 
1319 static void fill_note(struct memelfnote *note, const char *name, int type,
1320 		unsigned int sz, void *data)
1321 {
1322 	note->name = name;
1323 	note->type = type;
1324 	note->datasz = sz;
1325 	note->data = data;
1326 	return;
1327 }
1328 
1329 /*
1330  * fill up all the fields in prstatus from the given task struct, except
1331  * registers which need to be filled up separately.
1332  */
1333 static void fill_prstatus(struct elf_prstatus *prstatus,
1334 		struct task_struct *p, long signr)
1335 {
1336 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1337 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1338 	prstatus->pr_sighold = p->blocked.sig[0];
1339 	prstatus->pr_pid = task_pid_vnr(p);
1340 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1341 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1342 	prstatus->pr_sid = task_session_vnr(p);
1343 	if (thread_group_leader(p)) {
1344 		struct task_cputime cputime;
1345 
1346 		/*
1347 		 * This is the record for the group leader.  It shows the
1348 		 * group-wide total, not its individual thread total.
1349 		 */
1350 		thread_group_cputime(p, &cputime);
1351 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1352 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1353 	} else {
1354 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1355 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1356 	}
1357 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1358 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1359 }
1360 
1361 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1362 		       struct mm_struct *mm)
1363 {
1364 	const struct cred *cred;
1365 	unsigned int i, len;
1366 
1367 	/* first copy the parameters from user space */
1368 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1369 
1370 	len = mm->arg_end - mm->arg_start;
1371 	if (len >= ELF_PRARGSZ)
1372 		len = ELF_PRARGSZ-1;
1373 	if (copy_from_user(&psinfo->pr_psargs,
1374 		           (const char __user *)mm->arg_start, len))
1375 		return -EFAULT;
1376 	for(i = 0; i < len; i++)
1377 		if (psinfo->pr_psargs[i] == 0)
1378 			psinfo->pr_psargs[i] = ' ';
1379 	psinfo->pr_psargs[len] = 0;
1380 
1381 	psinfo->pr_pid = task_pid_vnr(p);
1382 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1383 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1384 	psinfo->pr_sid = task_session_vnr(p);
1385 
1386 	i = p->state ? ffz(~p->state) + 1 : 0;
1387 	psinfo->pr_state = i;
1388 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1389 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1390 	psinfo->pr_nice = task_nice(p);
1391 	psinfo->pr_flag = p->flags;
1392 	rcu_read_lock();
1393 	cred = __task_cred(p);
1394 	SET_UID(psinfo->pr_uid, cred->uid);
1395 	SET_GID(psinfo->pr_gid, cred->gid);
1396 	rcu_read_unlock();
1397 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1398 
1399 	return 0;
1400 }
1401 
1402 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1403 {
1404 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1405 	int i = 0;
1406 	do
1407 		i += 2;
1408 	while (auxv[i - 2] != AT_NULL);
1409 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1410 }
1411 
1412 #ifdef CORE_DUMP_USE_REGSET
1413 #include <linux/regset.h>
1414 
1415 struct elf_thread_core_info {
1416 	struct elf_thread_core_info *next;
1417 	struct task_struct *task;
1418 	struct elf_prstatus prstatus;
1419 	struct memelfnote notes[0];
1420 };
1421 
1422 struct elf_note_info {
1423 	struct elf_thread_core_info *thread;
1424 	struct memelfnote psinfo;
1425 	struct memelfnote auxv;
1426 	size_t size;
1427 	int thread_notes;
1428 };
1429 
1430 /*
1431  * When a regset has a writeback hook, we call it on each thread before
1432  * dumping user memory.  On register window machines, this makes sure the
1433  * user memory backing the register data is up to date before we read it.
1434  */
1435 static void do_thread_regset_writeback(struct task_struct *task,
1436 				       const struct user_regset *regset)
1437 {
1438 	if (regset->writeback)
1439 		regset->writeback(task, regset, 1);
1440 }
1441 
1442 static int fill_thread_core_info(struct elf_thread_core_info *t,
1443 				 const struct user_regset_view *view,
1444 				 long signr, size_t *total)
1445 {
1446 	unsigned int i;
1447 
1448 	/*
1449 	 * NT_PRSTATUS is the one special case, because the regset data
1450 	 * goes into the pr_reg field inside the note contents, rather
1451 	 * than being the whole note contents.  We fill the reset in here.
1452 	 * We assume that regset 0 is NT_PRSTATUS.
1453 	 */
1454 	fill_prstatus(&t->prstatus, t->task, signr);
1455 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1456 				    0, sizeof(t->prstatus.pr_reg),
1457 				    &t->prstatus.pr_reg, NULL);
1458 
1459 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1460 		  sizeof(t->prstatus), &t->prstatus);
1461 	*total += notesize(&t->notes[0]);
1462 
1463 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1464 
1465 	/*
1466 	 * Each other regset might generate a note too.  For each regset
1467 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1468 	 * all zero and we'll know to skip writing it later.
1469 	 */
1470 	for (i = 1; i < view->n; ++i) {
1471 		const struct user_regset *regset = &view->regsets[i];
1472 		do_thread_regset_writeback(t->task, regset);
1473 		if (regset->core_note_type &&
1474 		    (!regset->active || regset->active(t->task, regset))) {
1475 			int ret;
1476 			size_t size = regset->n * regset->size;
1477 			void *data = kmalloc(size, GFP_KERNEL);
1478 			if (unlikely(!data))
1479 				return 0;
1480 			ret = regset->get(t->task, regset,
1481 					  0, size, data, NULL);
1482 			if (unlikely(ret))
1483 				kfree(data);
1484 			else {
1485 				if (regset->core_note_type != NT_PRFPREG)
1486 					fill_note(&t->notes[i], "LINUX",
1487 						  regset->core_note_type,
1488 						  size, data);
1489 				else {
1490 					t->prstatus.pr_fpvalid = 1;
1491 					fill_note(&t->notes[i], "CORE",
1492 						  NT_PRFPREG, size, data);
1493 				}
1494 				*total += notesize(&t->notes[i]);
1495 			}
1496 		}
1497 	}
1498 
1499 	return 1;
1500 }
1501 
1502 static int fill_note_info(struct elfhdr *elf, int phdrs,
1503 			  struct elf_note_info *info,
1504 			  long signr, struct pt_regs *regs)
1505 {
1506 	struct task_struct *dump_task = current;
1507 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1508 	struct elf_thread_core_info *t;
1509 	struct elf_prpsinfo *psinfo;
1510 	struct core_thread *ct;
1511 	unsigned int i;
1512 
1513 	info->size = 0;
1514 	info->thread = NULL;
1515 
1516 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1517 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1518 
1519 	if (psinfo == NULL)
1520 		return 0;
1521 
1522 	/*
1523 	 * Figure out how many notes we're going to need for each thread.
1524 	 */
1525 	info->thread_notes = 0;
1526 	for (i = 0; i < view->n; ++i)
1527 		if (view->regsets[i].core_note_type != 0)
1528 			++info->thread_notes;
1529 
1530 	/*
1531 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1532 	 * since it is our one special case.
1533 	 */
1534 	if (unlikely(info->thread_notes == 0) ||
1535 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1536 		WARN_ON(1);
1537 		return 0;
1538 	}
1539 
1540 	/*
1541 	 * Initialize the ELF file header.
1542 	 */
1543 	fill_elf_header(elf, phdrs,
1544 			view->e_machine, view->e_flags, view->ei_osabi);
1545 
1546 	/*
1547 	 * Allocate a structure for each thread.
1548 	 */
1549 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1550 		t = kzalloc(offsetof(struct elf_thread_core_info,
1551 				     notes[info->thread_notes]),
1552 			    GFP_KERNEL);
1553 		if (unlikely(!t))
1554 			return 0;
1555 
1556 		t->task = ct->task;
1557 		if (ct->task == dump_task || !info->thread) {
1558 			t->next = info->thread;
1559 			info->thread = t;
1560 		} else {
1561 			/*
1562 			 * Make sure to keep the original task at
1563 			 * the head of the list.
1564 			 */
1565 			t->next = info->thread->next;
1566 			info->thread->next = t;
1567 		}
1568 	}
1569 
1570 	/*
1571 	 * Now fill in each thread's information.
1572 	 */
1573 	for (t = info->thread; t != NULL; t = t->next)
1574 		if (!fill_thread_core_info(t, view, signr, &info->size))
1575 			return 0;
1576 
1577 	/*
1578 	 * Fill in the two process-wide notes.
1579 	 */
1580 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1581 	info->size += notesize(&info->psinfo);
1582 
1583 	fill_auxv_note(&info->auxv, current->mm);
1584 	info->size += notesize(&info->auxv);
1585 
1586 	return 1;
1587 }
1588 
1589 static size_t get_note_info_size(struct elf_note_info *info)
1590 {
1591 	return info->size;
1592 }
1593 
1594 /*
1595  * Write all the notes for each thread.  When writing the first thread, the
1596  * process-wide notes are interleaved after the first thread-specific note.
1597  */
1598 static int write_note_info(struct elf_note_info *info,
1599 			   struct file *file, loff_t *foffset)
1600 {
1601 	bool first = 1;
1602 	struct elf_thread_core_info *t = info->thread;
1603 
1604 	do {
1605 		int i;
1606 
1607 		if (!writenote(&t->notes[0], file, foffset))
1608 			return 0;
1609 
1610 		if (first && !writenote(&info->psinfo, file, foffset))
1611 			return 0;
1612 		if (first && !writenote(&info->auxv, file, foffset))
1613 			return 0;
1614 
1615 		for (i = 1; i < info->thread_notes; ++i)
1616 			if (t->notes[i].data &&
1617 			    !writenote(&t->notes[i], file, foffset))
1618 				return 0;
1619 
1620 		first = 0;
1621 		t = t->next;
1622 	} while (t);
1623 
1624 	return 1;
1625 }
1626 
1627 static void free_note_info(struct elf_note_info *info)
1628 {
1629 	struct elf_thread_core_info *threads = info->thread;
1630 	while (threads) {
1631 		unsigned int i;
1632 		struct elf_thread_core_info *t = threads;
1633 		threads = t->next;
1634 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1635 		for (i = 1; i < info->thread_notes; ++i)
1636 			kfree(t->notes[i].data);
1637 		kfree(t);
1638 	}
1639 	kfree(info->psinfo.data);
1640 }
1641 
1642 #else
1643 
1644 /* Here is the structure in which status of each thread is captured. */
1645 struct elf_thread_status
1646 {
1647 	struct list_head list;
1648 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1649 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1650 	struct task_struct *thread;
1651 #ifdef ELF_CORE_COPY_XFPREGS
1652 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1653 #endif
1654 	struct memelfnote notes[3];
1655 	int num_notes;
1656 };
1657 
1658 /*
1659  * In order to add the specific thread information for the elf file format,
1660  * we need to keep a linked list of every threads pr_status and then create
1661  * a single section for them in the final core file.
1662  */
1663 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1664 {
1665 	int sz = 0;
1666 	struct task_struct *p = t->thread;
1667 	t->num_notes = 0;
1668 
1669 	fill_prstatus(&t->prstatus, p, signr);
1670 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1671 
1672 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1673 		  &(t->prstatus));
1674 	t->num_notes++;
1675 	sz += notesize(&t->notes[0]);
1676 
1677 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1678 								&t->fpu))) {
1679 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1680 			  &(t->fpu));
1681 		t->num_notes++;
1682 		sz += notesize(&t->notes[1]);
1683 	}
1684 
1685 #ifdef ELF_CORE_COPY_XFPREGS
1686 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1687 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1688 			  sizeof(t->xfpu), &t->xfpu);
1689 		t->num_notes++;
1690 		sz += notesize(&t->notes[2]);
1691 	}
1692 #endif
1693 	return sz;
1694 }
1695 
1696 struct elf_note_info {
1697 	struct memelfnote *notes;
1698 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1699 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1700 	struct list_head thread_list;
1701 	elf_fpregset_t *fpu;
1702 #ifdef ELF_CORE_COPY_XFPREGS
1703 	elf_fpxregset_t *xfpu;
1704 #endif
1705 	int thread_status_size;
1706 	int numnote;
1707 };
1708 
1709 static int fill_note_info(struct elfhdr *elf, int phdrs,
1710 			  struct elf_note_info *info,
1711 			  long signr, struct pt_regs *regs)
1712 {
1713 #define	NUM_NOTES	6
1714 	struct list_head *t;
1715 
1716 	info->notes = NULL;
1717 	info->prstatus = NULL;
1718 	info->psinfo = NULL;
1719 	info->fpu = NULL;
1720 #ifdef ELF_CORE_COPY_XFPREGS
1721 	info->xfpu = NULL;
1722 #endif
1723 	INIT_LIST_HEAD(&info->thread_list);
1724 
1725 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1726 			      GFP_KERNEL);
1727 	if (!info->notes)
1728 		return 0;
1729 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1730 	if (!info->psinfo)
1731 		return 0;
1732 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1733 	if (!info->prstatus)
1734 		return 0;
1735 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1736 	if (!info->fpu)
1737 		return 0;
1738 #ifdef ELF_CORE_COPY_XFPREGS
1739 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1740 	if (!info->xfpu)
1741 		return 0;
1742 #endif
1743 
1744 	info->thread_status_size = 0;
1745 	if (signr) {
1746 		struct core_thread *ct;
1747 		struct elf_thread_status *ets;
1748 
1749 		for (ct = current->mm->core_state->dumper.next;
1750 						ct; ct = ct->next) {
1751 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1752 			if (!ets)
1753 				return 0;
1754 
1755 			ets->thread = ct->task;
1756 			list_add(&ets->list, &info->thread_list);
1757 		}
1758 
1759 		list_for_each(t, &info->thread_list) {
1760 			int sz;
1761 
1762 			ets = list_entry(t, struct elf_thread_status, list);
1763 			sz = elf_dump_thread_status(signr, ets);
1764 			info->thread_status_size += sz;
1765 		}
1766 	}
1767 	/* now collect the dump for the current */
1768 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1769 	fill_prstatus(info->prstatus, current, signr);
1770 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1771 
1772 	/* Set up header */
1773 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1774 
1775 	/*
1776 	 * Set up the notes in similar form to SVR4 core dumps made
1777 	 * with info from their /proc.
1778 	 */
1779 
1780 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1781 		  sizeof(*info->prstatus), info->prstatus);
1782 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1783 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1784 		  sizeof(*info->psinfo), info->psinfo);
1785 
1786 	info->numnote = 2;
1787 
1788 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1789 
1790 	/* Try to dump the FPU. */
1791 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1792 							       info->fpu);
1793 	if (info->prstatus->pr_fpvalid)
1794 		fill_note(info->notes + info->numnote++,
1795 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1796 #ifdef ELF_CORE_COPY_XFPREGS
1797 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1798 		fill_note(info->notes + info->numnote++,
1799 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1800 			  sizeof(*info->xfpu), info->xfpu);
1801 #endif
1802 
1803 	return 1;
1804 
1805 #undef NUM_NOTES
1806 }
1807 
1808 static size_t get_note_info_size(struct elf_note_info *info)
1809 {
1810 	int sz = 0;
1811 	int i;
1812 
1813 	for (i = 0; i < info->numnote; i++)
1814 		sz += notesize(info->notes + i);
1815 
1816 	sz += info->thread_status_size;
1817 
1818 	return sz;
1819 }
1820 
1821 static int write_note_info(struct elf_note_info *info,
1822 			   struct file *file, loff_t *foffset)
1823 {
1824 	int i;
1825 	struct list_head *t;
1826 
1827 	for (i = 0; i < info->numnote; i++)
1828 		if (!writenote(info->notes + i, file, foffset))
1829 			return 0;
1830 
1831 	/* write out the thread status notes section */
1832 	list_for_each(t, &info->thread_list) {
1833 		struct elf_thread_status *tmp =
1834 				list_entry(t, struct elf_thread_status, list);
1835 
1836 		for (i = 0; i < tmp->num_notes; i++)
1837 			if (!writenote(&tmp->notes[i], file, foffset))
1838 				return 0;
1839 	}
1840 
1841 	return 1;
1842 }
1843 
1844 static void free_note_info(struct elf_note_info *info)
1845 {
1846 	while (!list_empty(&info->thread_list)) {
1847 		struct list_head *tmp = info->thread_list.next;
1848 		list_del(tmp);
1849 		kfree(list_entry(tmp, struct elf_thread_status, list));
1850 	}
1851 
1852 	kfree(info->prstatus);
1853 	kfree(info->psinfo);
1854 	kfree(info->notes);
1855 	kfree(info->fpu);
1856 #ifdef ELF_CORE_COPY_XFPREGS
1857 	kfree(info->xfpu);
1858 #endif
1859 }
1860 
1861 #endif
1862 
1863 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1864 					struct vm_area_struct *gate_vma)
1865 {
1866 	struct vm_area_struct *ret = tsk->mm->mmap;
1867 
1868 	if (ret)
1869 		return ret;
1870 	return gate_vma;
1871 }
1872 /*
1873  * Helper function for iterating across a vma list.  It ensures that the caller
1874  * will visit `gate_vma' prior to terminating the search.
1875  */
1876 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1877 					struct vm_area_struct *gate_vma)
1878 {
1879 	struct vm_area_struct *ret;
1880 
1881 	ret = this_vma->vm_next;
1882 	if (ret)
1883 		return ret;
1884 	if (this_vma == gate_vma)
1885 		return NULL;
1886 	return gate_vma;
1887 }
1888 
1889 /*
1890  * Actual dumper
1891  *
1892  * This is a two-pass process; first we find the offsets of the bits,
1893  * and then they are actually written out.  If we run out of core limit
1894  * we just truncate.
1895  */
1896 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1897 {
1898 	int has_dumped = 0;
1899 	mm_segment_t fs;
1900 	int segs;
1901 	size_t size = 0;
1902 	struct vm_area_struct *vma, *gate_vma;
1903 	struct elfhdr *elf = NULL;
1904 	loff_t offset = 0, dataoff, foffset;
1905 	unsigned long mm_flags;
1906 	struct elf_note_info info;
1907 
1908 	/*
1909 	 * We no longer stop all VM operations.
1910 	 *
1911 	 * This is because those proceses that could possibly change map_count
1912 	 * or the mmap / vma pages are now blocked in do_exit on current
1913 	 * finishing this core dump.
1914 	 *
1915 	 * Only ptrace can touch these memory addresses, but it doesn't change
1916 	 * the map_count or the pages allocated. So no possibility of crashing
1917 	 * exists while dumping the mm->vm_next areas to the core file.
1918 	 */
1919 
1920 	/* alloc memory for large data structures: too large to be on stack */
1921 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1922 	if (!elf)
1923 		goto out;
1924 
1925 	segs = current->mm->map_count;
1926 #ifdef ELF_CORE_EXTRA_PHDRS
1927 	segs += ELF_CORE_EXTRA_PHDRS;
1928 #endif
1929 
1930 	gate_vma = get_gate_vma(current);
1931 	if (gate_vma != NULL)
1932 		segs++;
1933 
1934 	/*
1935 	 * Collect all the non-memory information about the process for the
1936 	 * notes.  This also sets up the file header.
1937 	 */
1938 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1939 			    &info, signr, regs))
1940 		goto cleanup;
1941 
1942 	has_dumped = 1;
1943 	current->flags |= PF_DUMPCORE;
1944 
1945 	fs = get_fs();
1946 	set_fs(KERNEL_DS);
1947 
1948 	DUMP_WRITE(elf, sizeof(*elf));
1949 	offset += sizeof(*elf);				/* Elf header */
1950 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1951 	foffset = offset;
1952 
1953 	/* Write notes phdr entry */
1954 	{
1955 		struct elf_phdr phdr;
1956 		size_t sz = get_note_info_size(&info);
1957 
1958 		sz += elf_coredump_extra_notes_size();
1959 
1960 		fill_elf_note_phdr(&phdr, sz, offset);
1961 		offset += sz;
1962 		DUMP_WRITE(&phdr, sizeof(phdr));
1963 	}
1964 
1965 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1966 
1967 	/*
1968 	 * We must use the same mm->flags while dumping core to avoid
1969 	 * inconsistency between the program headers and bodies, otherwise an
1970 	 * unusable core file can be generated.
1971 	 */
1972 	mm_flags = current->mm->flags;
1973 
1974 	/* Write program headers for segments dump */
1975 	for (vma = first_vma(current, gate_vma); vma != NULL;
1976 			vma = next_vma(vma, gate_vma)) {
1977 		struct elf_phdr phdr;
1978 
1979 		phdr.p_type = PT_LOAD;
1980 		phdr.p_offset = offset;
1981 		phdr.p_vaddr = vma->vm_start;
1982 		phdr.p_paddr = 0;
1983 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
1984 		phdr.p_memsz = vma->vm_end - vma->vm_start;
1985 		offset += phdr.p_filesz;
1986 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1987 		if (vma->vm_flags & VM_WRITE)
1988 			phdr.p_flags |= PF_W;
1989 		if (vma->vm_flags & VM_EXEC)
1990 			phdr.p_flags |= PF_X;
1991 		phdr.p_align = ELF_EXEC_PAGESIZE;
1992 
1993 		DUMP_WRITE(&phdr, sizeof(phdr));
1994 	}
1995 
1996 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1997 	ELF_CORE_WRITE_EXTRA_PHDRS;
1998 #endif
1999 
2000  	/* write out the notes section */
2001 	if (!write_note_info(&info, file, &foffset))
2002 		goto end_coredump;
2003 
2004 	if (elf_coredump_extra_notes_write(file, &foffset))
2005 		goto end_coredump;
2006 
2007 	/* Align to page */
2008 	DUMP_SEEK(dataoff - foffset);
2009 
2010 	for (vma = first_vma(current, gate_vma); vma != NULL;
2011 			vma = next_vma(vma, gate_vma)) {
2012 		unsigned long addr;
2013 		unsigned long end;
2014 
2015 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2016 
2017 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2018 			struct page *page;
2019 			struct vm_area_struct *tmp_vma;
2020 
2021 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2022 						&page, &tmp_vma) <= 0) {
2023 				DUMP_SEEK(PAGE_SIZE);
2024 			} else {
2025 				if (page == ZERO_PAGE(0)) {
2026 					if (!dump_seek(file, PAGE_SIZE)) {
2027 						page_cache_release(page);
2028 						goto end_coredump;
2029 					}
2030 				} else {
2031 					void *kaddr;
2032 					flush_cache_page(tmp_vma, addr,
2033 							 page_to_pfn(page));
2034 					kaddr = kmap(page);
2035 					if ((size += PAGE_SIZE) > limit ||
2036 					    !dump_write(file, kaddr,
2037 					    PAGE_SIZE)) {
2038 						kunmap(page);
2039 						page_cache_release(page);
2040 						goto end_coredump;
2041 					}
2042 					kunmap(page);
2043 				}
2044 				page_cache_release(page);
2045 			}
2046 		}
2047 	}
2048 
2049 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2050 	ELF_CORE_WRITE_EXTRA_DATA;
2051 #endif
2052 
2053 end_coredump:
2054 	set_fs(fs);
2055 
2056 cleanup:
2057 	free_note_info(&info);
2058 	kfree(elf);
2059 out:
2060 	return has_dumped;
2061 }
2062 
2063 #endif		/* USE_ELF_CORE_DUMP */
2064 
2065 static int __init init_elf_binfmt(void)
2066 {
2067 	return register_binfmt(&elf_format);
2068 }
2069 
2070 static void __exit exit_elf_binfmt(void)
2071 {
2072 	/* Remove the COFF and ELF loaders. */
2073 	unregister_binfmt(&elf_format);
2074 }
2075 
2076 core_initcall(init_elf_binfmt);
2077 module_exit(exit_elf_binfmt);
2078 MODULE_LICENSE("GPL");
2079