xref: /linux/fs/binfmt_elf.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44 
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48 				int, int, unsigned long);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE,
80 		.hasvdso	= 1
81 };
82 
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84 
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 	start = ELF_PAGEALIGN(start);
88 	end = ELF_PAGEALIGN(end);
89 	if (end > start) {
90 		unsigned long addr;
91 		down_write(&current->mm->mmap_sem);
92 		addr = do_brk(start, end - start);
93 		up_write(&current->mm->mmap_sem);
94 		if (BAD_ADDR(addr))
95 			return addr;
96 	}
97 	current->mm->start_brk = current->mm->brk = end;
98 	return 0;
99 }
100 
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108 	unsigned long nbyte;
109 
110 	nbyte = ELF_PAGEOFFSET(elf_bss);
111 	if (nbyte) {
112 		nbyte = ELF_MIN_ALIGN - nbyte;
113 		if (clear_user((void __user *) elf_bss, nbyte))
114 			return -EFAULT;
115 	}
116 	return 0;
117 }
118 
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 	old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 	(((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133 
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142 
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145 		unsigned long load_addr, unsigned long interp_load_addr)
146 {
147 	unsigned long p = bprm->p;
148 	int argc = bprm->argc;
149 	int envc = bprm->envc;
150 	elf_addr_t __user *argv;
151 	elf_addr_t __user *envp;
152 	elf_addr_t __user *sp;
153 	elf_addr_t __user *u_platform;
154 	elf_addr_t __user *u_base_platform;
155 	elf_addr_t __user *u_rand_bytes;
156 	const char *k_platform = ELF_PLATFORM;
157 	const char *k_base_platform = ELF_BASE_PLATFORM;
158 	unsigned char k_rand_bytes[16];
159 	int items;
160 	elf_addr_t *elf_info;
161 	int ei_index = 0;
162 	const struct cred *cred = current_cred();
163 	struct vm_area_struct *vma;
164 
165 	/*
166 	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
167 	 * evictions by the processes running on the same package. One
168 	 * thing we can do is to shuffle the initial stack for them.
169 	 */
170 
171 	p = arch_align_stack(p);
172 
173 	/*
174 	 * If this architecture has a platform capability string, copy it
175 	 * to userspace.  In some cases (Sparc), this info is impossible
176 	 * for userspace to get any other way, in others (i386) it is
177 	 * merely difficult.
178 	 */
179 	u_platform = NULL;
180 	if (k_platform) {
181 		size_t len = strlen(k_platform) + 1;
182 
183 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184 		if (__copy_to_user(u_platform, k_platform, len))
185 			return -EFAULT;
186 	}
187 
188 	/*
189 	 * If this architecture has a "base" platform capability
190 	 * string, copy it to userspace.
191 	 */
192 	u_base_platform = NULL;
193 	if (k_base_platform) {
194 		size_t len = strlen(k_base_platform) + 1;
195 
196 		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197 		if (__copy_to_user(u_base_platform, k_base_platform, len))
198 			return -EFAULT;
199 	}
200 
201 	/*
202 	 * Generate 16 random bytes for userspace PRNG seeding.
203 	 */
204 	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205 	u_rand_bytes = (elf_addr_t __user *)
206 		       STACK_ALLOC(p, sizeof(k_rand_bytes));
207 	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208 		return -EFAULT;
209 
210 	/* Create the ELF interpreter info */
211 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
212 	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214 	do { \
215 		elf_info[ei_index++] = id; \
216 		elf_info[ei_index++] = val; \
217 	} while (0)
218 
219 #ifdef ARCH_DLINFO
220 	/*
221 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
222 	 * AUXV.
223 	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224 	 * ARCH_DLINFO changes
225 	 */
226 	ARCH_DLINFO;
227 #endif
228 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
235 	NEW_AUX_ENT(AT_FLAGS, 0);
236 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237 	NEW_AUX_ENT(AT_UID, cred->uid);
238 	NEW_AUX_ENT(AT_EUID, cred->euid);
239 	NEW_AUX_ENT(AT_GID, cred->gid);
240 	NEW_AUX_ENT(AT_EGID, cred->egid);
241  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244 	if (k_platform) {
245 		NEW_AUX_ENT(AT_PLATFORM,
246 			    (elf_addr_t)(unsigned long)u_platform);
247 	}
248 	if (k_base_platform) {
249 		NEW_AUX_ENT(AT_BASE_PLATFORM,
250 			    (elf_addr_t)(unsigned long)u_base_platform);
251 	}
252 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254 	}
255 #undef NEW_AUX_ENT
256 	/* AT_NULL is zero; clear the rest too */
257 	memset(&elf_info[ei_index], 0,
258 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259 
260 	/* And advance past the AT_NULL entry.  */
261 	ei_index += 2;
262 
263 	sp = STACK_ADD(p, ei_index);
264 
265 	items = (argc + 1) + (envc + 1) + 1;
266 	bprm->p = STACK_ROUND(sp, items);
267 
268 	/* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273 	sp = (elf_addr_t __user *)bprm->p;
274 #endif
275 
276 
277 	/*
278 	 * Grow the stack manually; some architectures have a limit on how
279 	 * far ahead a user-space access may be in order to grow the stack.
280 	 */
281 	vma = find_extend_vma(current->mm, bprm->p);
282 	if (!vma)
283 		return -EFAULT;
284 
285 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
286 	if (__put_user(argc, sp++))
287 		return -EFAULT;
288 	argv = sp;
289 	envp = argv + argc + 1;
290 
291 	/* Populate argv and envp */
292 	p = current->mm->arg_end = current->mm->arg_start;
293 	while (argc-- > 0) {
294 		size_t len;
295 		if (__put_user((elf_addr_t)p, argv++))
296 			return -EFAULT;
297 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298 		if (!len || len > MAX_ARG_STRLEN)
299 			return -EINVAL;
300 		p += len;
301 	}
302 	if (__put_user(0, argv))
303 		return -EFAULT;
304 	current->mm->arg_end = current->mm->env_start = p;
305 	while (envc-- > 0) {
306 		size_t len;
307 		if (__put_user((elf_addr_t)p, envp++))
308 			return -EFAULT;
309 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310 		if (!len || len > MAX_ARG_STRLEN)
311 			return -EINVAL;
312 		p += len;
313 	}
314 	if (__put_user(0, envp))
315 		return -EFAULT;
316 	current->mm->env_end = p;
317 
318 	/* Put the elf_info on the stack in the right place.  */
319 	sp = (elf_addr_t __user *)envp + 1;
320 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321 		return -EFAULT;
322 	return 0;
323 }
324 
325 #ifndef elf_map
326 
327 static unsigned long elf_map(struct file *filep, unsigned long addr,
328 		struct elf_phdr *eppnt, int prot, int type,
329 		unsigned long total_size)
330 {
331 	unsigned long map_addr;
332 	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333 	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334 	addr = ELF_PAGESTART(addr);
335 	size = ELF_PAGEALIGN(size);
336 
337 	/* mmap() will return -EINVAL if given a zero size, but a
338 	 * segment with zero filesize is perfectly valid */
339 	if (!size)
340 		return addr;
341 
342 	down_write(&current->mm->mmap_sem);
343 	/*
344 	* total_size is the size of the ELF (interpreter) image.
345 	* The _first_ mmap needs to know the full size, otherwise
346 	* randomization might put this image into an overlapping
347 	* position with the ELF binary image. (since size < total_size)
348 	* So we first map the 'big' image - and unmap the remainder at
349 	* the end. (which unmap is needed for ELF images with holes.)
350 	*/
351 	if (total_size) {
352 		total_size = ELF_PAGEALIGN(total_size);
353 		map_addr = do_mmap(filep, addr, total_size, prot, type, off);
354 		if (!BAD_ADDR(map_addr))
355 			do_munmap(current->mm, map_addr+size, total_size-size);
356 	} else
357 		map_addr = do_mmap(filep, addr, size, prot, type, off);
358 
359 	up_write(&current->mm->mmap_sem);
360 	return(map_addr);
361 }
362 
363 #endif /* !elf_map */
364 
365 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
366 {
367 	int i, first_idx = -1, last_idx = -1;
368 
369 	for (i = 0; i < nr; i++) {
370 		if (cmds[i].p_type == PT_LOAD) {
371 			last_idx = i;
372 			if (first_idx == -1)
373 				first_idx = i;
374 		}
375 	}
376 	if (first_idx == -1)
377 		return 0;
378 
379 	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
380 				ELF_PAGESTART(cmds[first_idx].p_vaddr);
381 }
382 
383 
384 /* This is much more generalized than the library routine read function,
385    so we keep this separate.  Technically the library read function
386    is only provided so that we can read a.out libraries that have
387    an ELF header */
388 
389 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
390 		struct file *interpreter, unsigned long *interp_map_addr,
391 		unsigned long no_base)
392 {
393 	struct elf_phdr *elf_phdata;
394 	struct elf_phdr *eppnt;
395 	unsigned long load_addr = 0;
396 	int load_addr_set = 0;
397 	unsigned long last_bss = 0, elf_bss = 0;
398 	unsigned long error = ~0UL;
399 	unsigned long total_size;
400 	int retval, i, size;
401 
402 	/* First of all, some simple consistency checks */
403 	if (interp_elf_ex->e_type != ET_EXEC &&
404 	    interp_elf_ex->e_type != ET_DYN)
405 		goto out;
406 	if (!elf_check_arch(interp_elf_ex))
407 		goto out;
408 	if (!interpreter->f_op || !interpreter->f_op->mmap)
409 		goto out;
410 
411 	/*
412 	 * If the size of this structure has changed, then punt, since
413 	 * we will be doing the wrong thing.
414 	 */
415 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
416 		goto out;
417 	if (interp_elf_ex->e_phnum < 1 ||
418 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
419 		goto out;
420 
421 	/* Now read in all of the header information */
422 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
423 	if (size > ELF_MIN_ALIGN)
424 		goto out;
425 	elf_phdata = kmalloc(size, GFP_KERNEL);
426 	if (!elf_phdata)
427 		goto out;
428 
429 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
430 			     (char *)elf_phdata,size);
431 	error = -EIO;
432 	if (retval != size) {
433 		if (retval < 0)
434 			error = retval;
435 		goto out_close;
436 	}
437 
438 	total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
439 	if (!total_size) {
440 		error = -EINVAL;
441 		goto out_close;
442 	}
443 
444 	eppnt = elf_phdata;
445 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
446 		if (eppnt->p_type == PT_LOAD) {
447 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
448 			int elf_prot = 0;
449 			unsigned long vaddr = 0;
450 			unsigned long k, map_addr;
451 
452 			if (eppnt->p_flags & PF_R)
453 		    		elf_prot = PROT_READ;
454 			if (eppnt->p_flags & PF_W)
455 				elf_prot |= PROT_WRITE;
456 			if (eppnt->p_flags & PF_X)
457 				elf_prot |= PROT_EXEC;
458 			vaddr = eppnt->p_vaddr;
459 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
460 				elf_type |= MAP_FIXED;
461 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
462 				load_addr = -vaddr;
463 
464 			map_addr = elf_map(interpreter, load_addr + vaddr,
465 					eppnt, elf_prot, elf_type, total_size);
466 			total_size = 0;
467 			if (!*interp_map_addr)
468 				*interp_map_addr = map_addr;
469 			error = map_addr;
470 			if (BAD_ADDR(map_addr))
471 				goto out_close;
472 
473 			if (!load_addr_set &&
474 			    interp_elf_ex->e_type == ET_DYN) {
475 				load_addr = map_addr - ELF_PAGESTART(vaddr);
476 				load_addr_set = 1;
477 			}
478 
479 			/*
480 			 * Check to see if the section's size will overflow the
481 			 * allowed task size. Note that p_filesz must always be
482 			 * <= p_memsize so it's only necessary to check p_memsz.
483 			 */
484 			k = load_addr + eppnt->p_vaddr;
485 			if (BAD_ADDR(k) ||
486 			    eppnt->p_filesz > eppnt->p_memsz ||
487 			    eppnt->p_memsz > TASK_SIZE ||
488 			    TASK_SIZE - eppnt->p_memsz < k) {
489 				error = -ENOMEM;
490 				goto out_close;
491 			}
492 
493 			/*
494 			 * Find the end of the file mapping for this phdr, and
495 			 * keep track of the largest address we see for this.
496 			 */
497 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
498 			if (k > elf_bss)
499 				elf_bss = k;
500 
501 			/*
502 			 * Do the same thing for the memory mapping - between
503 			 * elf_bss and last_bss is the bss section.
504 			 */
505 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
506 			if (k > last_bss)
507 				last_bss = k;
508 		}
509 	}
510 
511 	/*
512 	 * Now fill out the bss section.  First pad the last page up
513 	 * to the page boundary, and then perform a mmap to make sure
514 	 * that there are zero-mapped pages up to and including the
515 	 * last bss page.
516 	 */
517 	if (padzero(elf_bss)) {
518 		error = -EFAULT;
519 		goto out_close;
520 	}
521 
522 	/* What we have mapped so far */
523 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
524 
525 	/* Map the last of the bss segment */
526 	if (last_bss > elf_bss) {
527 		down_write(&current->mm->mmap_sem);
528 		error = do_brk(elf_bss, last_bss - elf_bss);
529 		up_write(&current->mm->mmap_sem);
530 		if (BAD_ADDR(error))
531 			goto out_close;
532 	}
533 
534 	error = load_addr;
535 
536 out_close:
537 	kfree(elf_phdata);
538 out:
539 	return error;
540 }
541 
542 /*
543  * These are the functions used to load ELF style executables and shared
544  * libraries.  There is no binary dependent code anywhere else.
545  */
546 
547 #define INTERPRETER_NONE 0
548 #define INTERPRETER_ELF 2
549 
550 #ifndef STACK_RND_MASK
551 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
552 #endif
553 
554 static unsigned long randomize_stack_top(unsigned long stack_top)
555 {
556 	unsigned int random_variable = 0;
557 
558 	if ((current->flags & PF_RANDOMIZE) &&
559 		!(current->personality & ADDR_NO_RANDOMIZE)) {
560 		random_variable = get_random_int() & STACK_RND_MASK;
561 		random_variable <<= PAGE_SHIFT;
562 	}
563 #ifdef CONFIG_STACK_GROWSUP
564 	return PAGE_ALIGN(stack_top) + random_variable;
565 #else
566 	return PAGE_ALIGN(stack_top) - random_variable;
567 #endif
568 }
569 
570 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
571 {
572 	struct file *interpreter = NULL; /* to shut gcc up */
573  	unsigned long load_addr = 0, load_bias = 0;
574 	int load_addr_set = 0;
575 	char * elf_interpreter = NULL;
576 	unsigned long error;
577 	struct elf_phdr *elf_ppnt, *elf_phdata;
578 	unsigned long elf_bss, elf_brk;
579 	int elf_exec_fileno;
580 	int retval, i;
581 	unsigned int size;
582 	unsigned long elf_entry;
583 	unsigned long interp_load_addr = 0;
584 	unsigned long start_code, end_code, start_data, end_data;
585 	unsigned long reloc_func_desc = 0;
586 	int executable_stack = EXSTACK_DEFAULT;
587 	unsigned long def_flags = 0;
588 	struct {
589 		struct elfhdr elf_ex;
590 		struct elfhdr interp_elf_ex;
591 	} *loc;
592 
593 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594 	if (!loc) {
595 		retval = -ENOMEM;
596 		goto out_ret;
597 	}
598 
599 	/* Get the exec-header */
600 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
601 
602 	retval = -ENOEXEC;
603 	/* First of all, some simple consistency checks */
604 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605 		goto out;
606 
607 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608 		goto out;
609 	if (!elf_check_arch(&loc->elf_ex))
610 		goto out;
611 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
612 		goto out;
613 
614 	/* Now read in all of the header information */
615 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616 		goto out;
617 	if (loc->elf_ex.e_phnum < 1 ||
618 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619 		goto out;
620 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621 	retval = -ENOMEM;
622 	elf_phdata = kmalloc(size, GFP_KERNEL);
623 	if (!elf_phdata)
624 		goto out;
625 
626 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627 			     (char *)elf_phdata, size);
628 	if (retval != size) {
629 		if (retval >= 0)
630 			retval = -EIO;
631 		goto out_free_ph;
632 	}
633 
634 	retval = get_unused_fd();
635 	if (retval < 0)
636 		goto out_free_ph;
637 	get_file(bprm->file);
638 	fd_install(elf_exec_fileno = retval, bprm->file);
639 
640 	elf_ppnt = elf_phdata;
641 	elf_bss = 0;
642 	elf_brk = 0;
643 
644 	start_code = ~0UL;
645 	end_code = 0;
646 	start_data = 0;
647 	end_data = 0;
648 
649 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
650 		if (elf_ppnt->p_type == PT_INTERP) {
651 			/* This is the program interpreter used for
652 			 * shared libraries - for now assume that this
653 			 * is an a.out format binary
654 			 */
655 			retval = -ENOEXEC;
656 			if (elf_ppnt->p_filesz > PATH_MAX ||
657 			    elf_ppnt->p_filesz < 2)
658 				goto out_free_file;
659 
660 			retval = -ENOMEM;
661 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
662 						  GFP_KERNEL);
663 			if (!elf_interpreter)
664 				goto out_free_file;
665 
666 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
667 					     elf_interpreter,
668 					     elf_ppnt->p_filesz);
669 			if (retval != elf_ppnt->p_filesz) {
670 				if (retval >= 0)
671 					retval = -EIO;
672 				goto out_free_interp;
673 			}
674 			/* make sure path is NULL terminated */
675 			retval = -ENOEXEC;
676 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
677 				goto out_free_interp;
678 
679 			/*
680 			 * The early SET_PERSONALITY here is so that the lookup
681 			 * for the interpreter happens in the namespace of the
682 			 * to-be-execed image.  SET_PERSONALITY can select an
683 			 * alternate root.
684 			 *
685 			 * However, SET_PERSONALITY is NOT allowed to switch
686 			 * this task into the new images's memory mapping
687 			 * policy - that is, TASK_SIZE must still evaluate to
688 			 * that which is appropriate to the execing application.
689 			 * This is because exit_mmap() needs to have TASK_SIZE
690 			 * evaluate to the size of the old image.
691 			 *
692 			 * So if (say) a 64-bit application is execing a 32-bit
693 			 * application it is the architecture's responsibility
694 			 * to defer changing the value of TASK_SIZE until the
695 			 * switch really is going to happen - do this in
696 			 * flush_thread().	- akpm
697 			 */
698 			SET_PERSONALITY(loc->elf_ex);
699 
700 			interpreter = open_exec(elf_interpreter);
701 			retval = PTR_ERR(interpreter);
702 			if (IS_ERR(interpreter))
703 				goto out_free_interp;
704 
705 			/*
706 			 * If the binary is not readable then enforce
707 			 * mm->dumpable = 0 regardless of the interpreter's
708 			 * permissions.
709 			 */
710 			if (file_permission(interpreter, MAY_READ) < 0)
711 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
712 
713 			retval = kernel_read(interpreter, 0, bprm->buf,
714 					     BINPRM_BUF_SIZE);
715 			if (retval != BINPRM_BUF_SIZE) {
716 				if (retval >= 0)
717 					retval = -EIO;
718 				goto out_free_dentry;
719 			}
720 
721 			/* Get the exec headers */
722 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
723 			break;
724 		}
725 		elf_ppnt++;
726 	}
727 
728 	elf_ppnt = elf_phdata;
729 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
730 		if (elf_ppnt->p_type == PT_GNU_STACK) {
731 			if (elf_ppnt->p_flags & PF_X)
732 				executable_stack = EXSTACK_ENABLE_X;
733 			else
734 				executable_stack = EXSTACK_DISABLE_X;
735 			break;
736 		}
737 
738 	/* Some simple consistency checks for the interpreter */
739 	if (elf_interpreter) {
740 		retval = -ELIBBAD;
741 		/* Not an ELF interpreter */
742 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
743 			goto out_free_dentry;
744 		/* Verify the interpreter has a valid arch */
745 		if (!elf_check_arch(&loc->interp_elf_ex))
746 			goto out_free_dentry;
747 	} else {
748 		/* Executables without an interpreter also need a personality  */
749 		SET_PERSONALITY(loc->elf_ex);
750 	}
751 
752 	/* Flush all traces of the currently running executable */
753 	retval = flush_old_exec(bprm);
754 	if (retval)
755 		goto out_free_dentry;
756 
757 	/* OK, This is the point of no return */
758 	current->flags &= ~PF_FORKNOEXEC;
759 	current->mm->def_flags = def_flags;
760 
761 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
762 	   may depend on the personality.  */
763 	SET_PERSONALITY(loc->elf_ex);
764 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
765 		current->personality |= READ_IMPLIES_EXEC;
766 
767 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
768 		current->flags |= PF_RANDOMIZE;
769 	arch_pick_mmap_layout(current->mm);
770 
771 	/* Do this so that we can load the interpreter, if need be.  We will
772 	   change some of these later */
773 	current->mm->free_area_cache = current->mm->mmap_base;
774 	current->mm->cached_hole_size = 0;
775 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
776 				 executable_stack);
777 	if (retval < 0) {
778 		send_sig(SIGKILL, current, 0);
779 		goto out_free_dentry;
780 	}
781 
782 	current->mm->start_stack = bprm->p;
783 
784 	/* Now we do a little grungy work by mmaping the ELF image into
785 	   the correct location in memory. */
786 	for(i = 0, elf_ppnt = elf_phdata;
787 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
788 		int elf_prot = 0, elf_flags;
789 		unsigned long k, vaddr;
790 
791 		if (elf_ppnt->p_type != PT_LOAD)
792 			continue;
793 
794 		if (unlikely (elf_brk > elf_bss)) {
795 			unsigned long nbyte;
796 
797 			/* There was a PT_LOAD segment with p_memsz > p_filesz
798 			   before this one. Map anonymous pages, if needed,
799 			   and clear the area.  */
800 			retval = set_brk (elf_bss + load_bias,
801 					  elf_brk + load_bias);
802 			if (retval) {
803 				send_sig(SIGKILL, current, 0);
804 				goto out_free_dentry;
805 			}
806 			nbyte = ELF_PAGEOFFSET(elf_bss);
807 			if (nbyte) {
808 				nbyte = ELF_MIN_ALIGN - nbyte;
809 				if (nbyte > elf_brk - elf_bss)
810 					nbyte = elf_brk - elf_bss;
811 				if (clear_user((void __user *)elf_bss +
812 							load_bias, nbyte)) {
813 					/*
814 					 * This bss-zeroing can fail if the ELF
815 					 * file specifies odd protections. So
816 					 * we don't check the return value
817 					 */
818 				}
819 			}
820 		}
821 
822 		if (elf_ppnt->p_flags & PF_R)
823 			elf_prot |= PROT_READ;
824 		if (elf_ppnt->p_flags & PF_W)
825 			elf_prot |= PROT_WRITE;
826 		if (elf_ppnt->p_flags & PF_X)
827 			elf_prot |= PROT_EXEC;
828 
829 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
830 
831 		vaddr = elf_ppnt->p_vaddr;
832 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
833 			elf_flags |= MAP_FIXED;
834 		} else if (loc->elf_ex.e_type == ET_DYN) {
835 			/* Try and get dynamic programs out of the way of the
836 			 * default mmap base, as well as whatever program they
837 			 * might try to exec.  This is because the brk will
838 			 * follow the loader, and is not movable.  */
839 #ifdef CONFIG_X86
840 			load_bias = 0;
841 #else
842 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
843 #endif
844 		}
845 
846 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
847 				elf_prot, elf_flags, 0);
848 		if (BAD_ADDR(error)) {
849 			send_sig(SIGKILL, current, 0);
850 			retval = IS_ERR((void *)error) ?
851 				PTR_ERR((void*)error) : -EINVAL;
852 			goto out_free_dentry;
853 		}
854 
855 		if (!load_addr_set) {
856 			load_addr_set = 1;
857 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
858 			if (loc->elf_ex.e_type == ET_DYN) {
859 				load_bias += error -
860 				             ELF_PAGESTART(load_bias + vaddr);
861 				load_addr += load_bias;
862 				reloc_func_desc = load_bias;
863 			}
864 		}
865 		k = elf_ppnt->p_vaddr;
866 		if (k < start_code)
867 			start_code = k;
868 		if (start_data < k)
869 			start_data = k;
870 
871 		/*
872 		 * Check to see if the section's size will overflow the
873 		 * allowed task size. Note that p_filesz must always be
874 		 * <= p_memsz so it is only necessary to check p_memsz.
875 		 */
876 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
877 		    elf_ppnt->p_memsz > TASK_SIZE ||
878 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
879 			/* set_brk can never work. Avoid overflows. */
880 			send_sig(SIGKILL, current, 0);
881 			retval = -EINVAL;
882 			goto out_free_dentry;
883 		}
884 
885 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
886 
887 		if (k > elf_bss)
888 			elf_bss = k;
889 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
890 			end_code = k;
891 		if (end_data < k)
892 			end_data = k;
893 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
894 		if (k > elf_brk)
895 			elf_brk = k;
896 	}
897 
898 	loc->elf_ex.e_entry += load_bias;
899 	elf_bss += load_bias;
900 	elf_brk += load_bias;
901 	start_code += load_bias;
902 	end_code += load_bias;
903 	start_data += load_bias;
904 	end_data += load_bias;
905 
906 	/* Calling set_brk effectively mmaps the pages that we need
907 	 * for the bss and break sections.  We must do this before
908 	 * mapping in the interpreter, to make sure it doesn't wind
909 	 * up getting placed where the bss needs to go.
910 	 */
911 	retval = set_brk(elf_bss, elf_brk);
912 	if (retval) {
913 		send_sig(SIGKILL, current, 0);
914 		goto out_free_dentry;
915 	}
916 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
917 		send_sig(SIGSEGV, current, 0);
918 		retval = -EFAULT; /* Nobody gets to see this, but.. */
919 		goto out_free_dentry;
920 	}
921 
922 	if (elf_interpreter) {
923 		unsigned long uninitialized_var(interp_map_addr);
924 
925 		elf_entry = load_elf_interp(&loc->interp_elf_ex,
926 					    interpreter,
927 					    &interp_map_addr,
928 					    load_bias);
929 		if (!IS_ERR((void *)elf_entry)) {
930 			/*
931 			 * load_elf_interp() returns relocation
932 			 * adjustment
933 			 */
934 			interp_load_addr = elf_entry;
935 			elf_entry += loc->interp_elf_ex.e_entry;
936 		}
937 		if (BAD_ADDR(elf_entry)) {
938 			force_sig(SIGSEGV, current);
939 			retval = IS_ERR((void *)elf_entry) ?
940 					(int)elf_entry : -EINVAL;
941 			goto out_free_dentry;
942 		}
943 		reloc_func_desc = interp_load_addr;
944 
945 		allow_write_access(interpreter);
946 		fput(interpreter);
947 		kfree(elf_interpreter);
948 	} else {
949 		elf_entry = loc->elf_ex.e_entry;
950 		if (BAD_ADDR(elf_entry)) {
951 			force_sig(SIGSEGV, current);
952 			retval = -EINVAL;
953 			goto out_free_dentry;
954 		}
955 	}
956 
957 	kfree(elf_phdata);
958 
959 	sys_close(elf_exec_fileno);
960 
961 	set_binfmt(&elf_format);
962 
963 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
964 	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
965 	if (retval < 0) {
966 		send_sig(SIGKILL, current, 0);
967 		goto out;
968 	}
969 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
970 
971 	install_exec_creds(bprm);
972 	current->flags &= ~PF_FORKNOEXEC;
973 	retval = create_elf_tables(bprm, &loc->elf_ex,
974 			  load_addr, interp_load_addr);
975 	if (retval < 0) {
976 		send_sig(SIGKILL, current, 0);
977 		goto out;
978 	}
979 	/* N.B. passed_fileno might not be initialized? */
980 	current->mm->end_code = end_code;
981 	current->mm->start_code = start_code;
982 	current->mm->start_data = start_data;
983 	current->mm->end_data = end_data;
984 	current->mm->start_stack = bprm->p;
985 
986 #ifdef arch_randomize_brk
987 	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
988 		current->mm->brk = current->mm->start_brk =
989 			arch_randomize_brk(current->mm);
990 #endif
991 
992 	if (current->personality & MMAP_PAGE_ZERO) {
993 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
994 		   and some applications "depend" upon this behavior.
995 		   Since we do not have the power to recompile these, we
996 		   emulate the SVr4 behavior. Sigh. */
997 		down_write(&current->mm->mmap_sem);
998 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
999 				MAP_FIXED | MAP_PRIVATE, 0);
1000 		up_write(&current->mm->mmap_sem);
1001 	}
1002 
1003 #ifdef ELF_PLAT_INIT
1004 	/*
1005 	 * The ABI may specify that certain registers be set up in special
1006 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1007 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1008 	 * that the e_entry field is the address of the function descriptor
1009 	 * for the startup routine, rather than the address of the startup
1010 	 * routine itself.  This macro performs whatever initialization to
1011 	 * the regs structure is required as well as any relocations to the
1012 	 * function descriptor entries when executing dynamically links apps.
1013 	 */
1014 	ELF_PLAT_INIT(regs, reloc_func_desc);
1015 #endif
1016 
1017 	start_thread(regs, elf_entry, bprm->p);
1018 	retval = 0;
1019 out:
1020 	kfree(loc);
1021 out_ret:
1022 	return retval;
1023 
1024 	/* error cleanup */
1025 out_free_dentry:
1026 	allow_write_access(interpreter);
1027 	if (interpreter)
1028 		fput(interpreter);
1029 out_free_interp:
1030 	kfree(elf_interpreter);
1031 out_free_file:
1032 	sys_close(elf_exec_fileno);
1033 out_free_ph:
1034 	kfree(elf_phdata);
1035 	goto out;
1036 }
1037 
1038 /* This is really simpleminded and specialized - we are loading an
1039    a.out library that is given an ELF header. */
1040 static int load_elf_library(struct file *file)
1041 {
1042 	struct elf_phdr *elf_phdata;
1043 	struct elf_phdr *eppnt;
1044 	unsigned long elf_bss, bss, len;
1045 	int retval, error, i, j;
1046 	struct elfhdr elf_ex;
1047 
1048 	error = -ENOEXEC;
1049 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1050 	if (retval != sizeof(elf_ex))
1051 		goto out;
1052 
1053 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1054 		goto out;
1055 
1056 	/* First of all, some simple consistency checks */
1057 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1058 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1059 		goto out;
1060 
1061 	/* Now read in all of the header information */
1062 
1063 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1064 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1065 
1066 	error = -ENOMEM;
1067 	elf_phdata = kmalloc(j, GFP_KERNEL);
1068 	if (!elf_phdata)
1069 		goto out;
1070 
1071 	eppnt = elf_phdata;
1072 	error = -ENOEXEC;
1073 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1074 	if (retval != j)
1075 		goto out_free_ph;
1076 
1077 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1078 		if ((eppnt + i)->p_type == PT_LOAD)
1079 			j++;
1080 	if (j != 1)
1081 		goto out_free_ph;
1082 
1083 	while (eppnt->p_type != PT_LOAD)
1084 		eppnt++;
1085 
1086 	/* Now use mmap to map the library into memory. */
1087 	down_write(&current->mm->mmap_sem);
1088 	error = do_mmap(file,
1089 			ELF_PAGESTART(eppnt->p_vaddr),
1090 			(eppnt->p_filesz +
1091 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1092 			PROT_READ | PROT_WRITE | PROT_EXEC,
1093 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1094 			(eppnt->p_offset -
1095 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1096 	up_write(&current->mm->mmap_sem);
1097 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1098 		goto out_free_ph;
1099 
1100 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1101 	if (padzero(elf_bss)) {
1102 		error = -EFAULT;
1103 		goto out_free_ph;
1104 	}
1105 
1106 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1107 			    ELF_MIN_ALIGN - 1);
1108 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1109 	if (bss > len) {
1110 		down_write(&current->mm->mmap_sem);
1111 		do_brk(len, bss - len);
1112 		up_write(&current->mm->mmap_sem);
1113 	}
1114 	error = 0;
1115 
1116 out_free_ph:
1117 	kfree(elf_phdata);
1118 out:
1119 	return error;
1120 }
1121 
1122 /*
1123  * Note that some platforms still use traditional core dumps and not
1124  * the ELF core dump.  Each platform can select it as appropriate.
1125  */
1126 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1127 
1128 /*
1129  * ELF core dumper
1130  *
1131  * Modelled on fs/exec.c:aout_core_dump()
1132  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1133  */
1134 /*
1135  * These are the only things you should do on a core-file: use only these
1136  * functions to write out all the necessary info.
1137  */
1138 static int dump_write(struct file *file, const void *addr, int nr)
1139 {
1140 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1141 }
1142 
1143 static int dump_seek(struct file *file, loff_t off)
1144 {
1145 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1146 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1147 			return 0;
1148 	} else {
1149 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1150 		if (!buf)
1151 			return 0;
1152 		while (off > 0) {
1153 			unsigned long n = off;
1154 			if (n > PAGE_SIZE)
1155 				n = PAGE_SIZE;
1156 			if (!dump_write(file, buf, n))
1157 				return 0;
1158 			off -= n;
1159 		}
1160 		free_page((unsigned long)buf);
1161 	}
1162 	return 1;
1163 }
1164 
1165 /*
1166  * Decide what to dump of a segment, part, all or none.
1167  */
1168 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1169 				   unsigned long mm_flags)
1170 {
1171 #define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1172 
1173 	/* The vma can be set up to tell us the answer directly.  */
1174 	if (vma->vm_flags & VM_ALWAYSDUMP)
1175 		goto whole;
1176 
1177 	/* Hugetlb memory check */
1178 	if (vma->vm_flags & VM_HUGETLB) {
1179 		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1180 			goto whole;
1181 		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1182 			goto whole;
1183 	}
1184 
1185 	/* Do not dump I/O mapped devices or special mappings */
1186 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1187 		return 0;
1188 
1189 	/* By default, dump shared memory if mapped from an anonymous file. */
1190 	if (vma->vm_flags & VM_SHARED) {
1191 		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1192 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1193 			goto whole;
1194 		return 0;
1195 	}
1196 
1197 	/* Dump segments that have been written to.  */
1198 	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1199 		goto whole;
1200 	if (vma->vm_file == NULL)
1201 		return 0;
1202 
1203 	if (FILTER(MAPPED_PRIVATE))
1204 		goto whole;
1205 
1206 	/*
1207 	 * If this looks like the beginning of a DSO or executable mapping,
1208 	 * check for an ELF header.  If we find one, dump the first page to
1209 	 * aid in determining what was mapped here.
1210 	 */
1211 	if (FILTER(ELF_HEADERS) &&
1212 	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1213 		u32 __user *header = (u32 __user *) vma->vm_start;
1214 		u32 word;
1215 		mm_segment_t fs = get_fs();
1216 		/*
1217 		 * Doing it this way gets the constant folded by GCC.
1218 		 */
1219 		union {
1220 			u32 cmp;
1221 			char elfmag[SELFMAG];
1222 		} magic;
1223 		BUILD_BUG_ON(SELFMAG != sizeof word);
1224 		magic.elfmag[EI_MAG0] = ELFMAG0;
1225 		magic.elfmag[EI_MAG1] = ELFMAG1;
1226 		magic.elfmag[EI_MAG2] = ELFMAG2;
1227 		magic.elfmag[EI_MAG3] = ELFMAG3;
1228 		/*
1229 		 * Switch to the user "segment" for get_user(),
1230 		 * then put back what elf_core_dump() had in place.
1231 		 */
1232 		set_fs(USER_DS);
1233 		if (unlikely(get_user(word, header)))
1234 			word = 0;
1235 		set_fs(fs);
1236 		if (word == magic.cmp)
1237 			return PAGE_SIZE;
1238 	}
1239 
1240 #undef	FILTER
1241 
1242 	return 0;
1243 
1244 whole:
1245 	return vma->vm_end - vma->vm_start;
1246 }
1247 
1248 /* An ELF note in memory */
1249 struct memelfnote
1250 {
1251 	const char *name;
1252 	int type;
1253 	unsigned int datasz;
1254 	void *data;
1255 };
1256 
1257 static int notesize(struct memelfnote *en)
1258 {
1259 	int sz;
1260 
1261 	sz = sizeof(struct elf_note);
1262 	sz += roundup(strlen(en->name) + 1, 4);
1263 	sz += roundup(en->datasz, 4);
1264 
1265 	return sz;
1266 }
1267 
1268 #define DUMP_WRITE(addr, nr, foffset)	\
1269 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1270 
1271 static int alignfile(struct file *file, loff_t *foffset)
1272 {
1273 	static const char buf[4] = { 0, };
1274 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1275 	return 1;
1276 }
1277 
1278 static int writenote(struct memelfnote *men, struct file *file,
1279 			loff_t *foffset)
1280 {
1281 	struct elf_note en;
1282 	en.n_namesz = strlen(men->name) + 1;
1283 	en.n_descsz = men->datasz;
1284 	en.n_type = men->type;
1285 
1286 	DUMP_WRITE(&en, sizeof(en), foffset);
1287 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1288 	if (!alignfile(file, foffset))
1289 		return 0;
1290 	DUMP_WRITE(men->data, men->datasz, foffset);
1291 	if (!alignfile(file, foffset))
1292 		return 0;
1293 
1294 	return 1;
1295 }
1296 #undef DUMP_WRITE
1297 
1298 #define DUMP_WRITE(addr, nr)	\
1299 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1300 		goto end_coredump;
1301 #define DUMP_SEEK(off)	\
1302 	if (!dump_seek(file, (off))) \
1303 		goto end_coredump;
1304 
1305 static void fill_elf_header(struct elfhdr *elf, int segs,
1306 			    u16 machine, u32 flags, u8 osabi)
1307 {
1308 	memset(elf, 0, sizeof(*elf));
1309 
1310 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1311 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1312 	elf->e_ident[EI_DATA] = ELF_DATA;
1313 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1314 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1315 
1316 	elf->e_type = ET_CORE;
1317 	elf->e_machine = machine;
1318 	elf->e_version = EV_CURRENT;
1319 	elf->e_phoff = sizeof(struct elfhdr);
1320 	elf->e_flags = flags;
1321 	elf->e_ehsize = sizeof(struct elfhdr);
1322 	elf->e_phentsize = sizeof(struct elf_phdr);
1323 	elf->e_phnum = segs;
1324 
1325 	return;
1326 }
1327 
1328 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1329 {
1330 	phdr->p_type = PT_NOTE;
1331 	phdr->p_offset = offset;
1332 	phdr->p_vaddr = 0;
1333 	phdr->p_paddr = 0;
1334 	phdr->p_filesz = sz;
1335 	phdr->p_memsz = 0;
1336 	phdr->p_flags = 0;
1337 	phdr->p_align = 0;
1338 	return;
1339 }
1340 
1341 static void fill_note(struct memelfnote *note, const char *name, int type,
1342 		unsigned int sz, void *data)
1343 {
1344 	note->name = name;
1345 	note->type = type;
1346 	note->datasz = sz;
1347 	note->data = data;
1348 	return;
1349 }
1350 
1351 /*
1352  * fill up all the fields in prstatus from the given task struct, except
1353  * registers which need to be filled up separately.
1354  */
1355 static void fill_prstatus(struct elf_prstatus *prstatus,
1356 		struct task_struct *p, long signr)
1357 {
1358 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1359 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1360 	prstatus->pr_sighold = p->blocked.sig[0];
1361 	prstatus->pr_pid = task_pid_vnr(p);
1362 	prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1363 	prstatus->pr_pgrp = task_pgrp_vnr(p);
1364 	prstatus->pr_sid = task_session_vnr(p);
1365 	if (thread_group_leader(p)) {
1366 		struct task_cputime cputime;
1367 
1368 		/*
1369 		 * This is the record for the group leader.  It shows the
1370 		 * group-wide total, not its individual thread total.
1371 		 */
1372 		thread_group_cputime(p, &cputime);
1373 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1374 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1375 	} else {
1376 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1377 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1378 	}
1379 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1380 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1381 }
1382 
1383 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1384 		       struct mm_struct *mm)
1385 {
1386 	const struct cred *cred;
1387 	unsigned int i, len;
1388 
1389 	/* first copy the parameters from user space */
1390 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1391 
1392 	len = mm->arg_end - mm->arg_start;
1393 	if (len >= ELF_PRARGSZ)
1394 		len = ELF_PRARGSZ-1;
1395 	if (copy_from_user(&psinfo->pr_psargs,
1396 		           (const char __user *)mm->arg_start, len))
1397 		return -EFAULT;
1398 	for(i = 0; i < len; i++)
1399 		if (psinfo->pr_psargs[i] == 0)
1400 			psinfo->pr_psargs[i] = ' ';
1401 	psinfo->pr_psargs[len] = 0;
1402 
1403 	psinfo->pr_pid = task_pid_vnr(p);
1404 	psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1405 	psinfo->pr_pgrp = task_pgrp_vnr(p);
1406 	psinfo->pr_sid = task_session_vnr(p);
1407 
1408 	i = p->state ? ffz(~p->state) + 1 : 0;
1409 	psinfo->pr_state = i;
1410 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1411 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1412 	psinfo->pr_nice = task_nice(p);
1413 	psinfo->pr_flag = p->flags;
1414 	rcu_read_lock();
1415 	cred = __task_cred(p);
1416 	SET_UID(psinfo->pr_uid, cred->uid);
1417 	SET_GID(psinfo->pr_gid, cred->gid);
1418 	rcu_read_unlock();
1419 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1420 
1421 	return 0;
1422 }
1423 
1424 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1425 {
1426 	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1427 	int i = 0;
1428 	do
1429 		i += 2;
1430 	while (auxv[i - 2] != AT_NULL);
1431 	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1432 }
1433 
1434 #ifdef CORE_DUMP_USE_REGSET
1435 #include <linux/regset.h>
1436 
1437 struct elf_thread_core_info {
1438 	struct elf_thread_core_info *next;
1439 	struct task_struct *task;
1440 	struct elf_prstatus prstatus;
1441 	struct memelfnote notes[0];
1442 };
1443 
1444 struct elf_note_info {
1445 	struct elf_thread_core_info *thread;
1446 	struct memelfnote psinfo;
1447 	struct memelfnote auxv;
1448 	size_t size;
1449 	int thread_notes;
1450 };
1451 
1452 /*
1453  * When a regset has a writeback hook, we call it on each thread before
1454  * dumping user memory.  On register window machines, this makes sure the
1455  * user memory backing the register data is up to date before we read it.
1456  */
1457 static void do_thread_regset_writeback(struct task_struct *task,
1458 				       const struct user_regset *regset)
1459 {
1460 	if (regset->writeback)
1461 		regset->writeback(task, regset, 1);
1462 }
1463 
1464 static int fill_thread_core_info(struct elf_thread_core_info *t,
1465 				 const struct user_regset_view *view,
1466 				 long signr, size_t *total)
1467 {
1468 	unsigned int i;
1469 
1470 	/*
1471 	 * NT_PRSTATUS is the one special case, because the regset data
1472 	 * goes into the pr_reg field inside the note contents, rather
1473 	 * than being the whole note contents.  We fill the reset in here.
1474 	 * We assume that regset 0 is NT_PRSTATUS.
1475 	 */
1476 	fill_prstatus(&t->prstatus, t->task, signr);
1477 	(void) view->regsets[0].get(t->task, &view->regsets[0],
1478 				    0, sizeof(t->prstatus.pr_reg),
1479 				    &t->prstatus.pr_reg, NULL);
1480 
1481 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1482 		  sizeof(t->prstatus), &t->prstatus);
1483 	*total += notesize(&t->notes[0]);
1484 
1485 	do_thread_regset_writeback(t->task, &view->regsets[0]);
1486 
1487 	/*
1488 	 * Each other regset might generate a note too.  For each regset
1489 	 * that has no core_note_type or is inactive, we leave t->notes[i]
1490 	 * all zero and we'll know to skip writing it later.
1491 	 */
1492 	for (i = 1; i < view->n; ++i) {
1493 		const struct user_regset *regset = &view->regsets[i];
1494 		do_thread_regset_writeback(t->task, regset);
1495 		if (regset->core_note_type &&
1496 		    (!regset->active || regset->active(t->task, regset))) {
1497 			int ret;
1498 			size_t size = regset->n * regset->size;
1499 			void *data = kmalloc(size, GFP_KERNEL);
1500 			if (unlikely(!data))
1501 				return 0;
1502 			ret = regset->get(t->task, regset,
1503 					  0, size, data, NULL);
1504 			if (unlikely(ret))
1505 				kfree(data);
1506 			else {
1507 				if (regset->core_note_type != NT_PRFPREG)
1508 					fill_note(&t->notes[i], "LINUX",
1509 						  regset->core_note_type,
1510 						  size, data);
1511 				else {
1512 					t->prstatus.pr_fpvalid = 1;
1513 					fill_note(&t->notes[i], "CORE",
1514 						  NT_PRFPREG, size, data);
1515 				}
1516 				*total += notesize(&t->notes[i]);
1517 			}
1518 		}
1519 	}
1520 
1521 	return 1;
1522 }
1523 
1524 static int fill_note_info(struct elfhdr *elf, int phdrs,
1525 			  struct elf_note_info *info,
1526 			  long signr, struct pt_regs *regs)
1527 {
1528 	struct task_struct *dump_task = current;
1529 	const struct user_regset_view *view = task_user_regset_view(dump_task);
1530 	struct elf_thread_core_info *t;
1531 	struct elf_prpsinfo *psinfo;
1532 	struct core_thread *ct;
1533 	unsigned int i;
1534 
1535 	info->size = 0;
1536 	info->thread = NULL;
1537 
1538 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1539 	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1540 
1541 	if (psinfo == NULL)
1542 		return 0;
1543 
1544 	/*
1545 	 * Figure out how many notes we're going to need for each thread.
1546 	 */
1547 	info->thread_notes = 0;
1548 	for (i = 0; i < view->n; ++i)
1549 		if (view->regsets[i].core_note_type != 0)
1550 			++info->thread_notes;
1551 
1552 	/*
1553 	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1554 	 * since it is our one special case.
1555 	 */
1556 	if (unlikely(info->thread_notes == 0) ||
1557 	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1558 		WARN_ON(1);
1559 		return 0;
1560 	}
1561 
1562 	/*
1563 	 * Initialize the ELF file header.
1564 	 */
1565 	fill_elf_header(elf, phdrs,
1566 			view->e_machine, view->e_flags, view->ei_osabi);
1567 
1568 	/*
1569 	 * Allocate a structure for each thread.
1570 	 */
1571 	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1572 		t = kzalloc(offsetof(struct elf_thread_core_info,
1573 				     notes[info->thread_notes]),
1574 			    GFP_KERNEL);
1575 		if (unlikely(!t))
1576 			return 0;
1577 
1578 		t->task = ct->task;
1579 		if (ct->task == dump_task || !info->thread) {
1580 			t->next = info->thread;
1581 			info->thread = t;
1582 		} else {
1583 			/*
1584 			 * Make sure to keep the original task at
1585 			 * the head of the list.
1586 			 */
1587 			t->next = info->thread->next;
1588 			info->thread->next = t;
1589 		}
1590 	}
1591 
1592 	/*
1593 	 * Now fill in each thread's information.
1594 	 */
1595 	for (t = info->thread; t != NULL; t = t->next)
1596 		if (!fill_thread_core_info(t, view, signr, &info->size))
1597 			return 0;
1598 
1599 	/*
1600 	 * Fill in the two process-wide notes.
1601 	 */
1602 	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1603 	info->size += notesize(&info->psinfo);
1604 
1605 	fill_auxv_note(&info->auxv, current->mm);
1606 	info->size += notesize(&info->auxv);
1607 
1608 	return 1;
1609 }
1610 
1611 static size_t get_note_info_size(struct elf_note_info *info)
1612 {
1613 	return info->size;
1614 }
1615 
1616 /*
1617  * Write all the notes for each thread.  When writing the first thread, the
1618  * process-wide notes are interleaved after the first thread-specific note.
1619  */
1620 static int write_note_info(struct elf_note_info *info,
1621 			   struct file *file, loff_t *foffset)
1622 {
1623 	bool first = 1;
1624 	struct elf_thread_core_info *t = info->thread;
1625 
1626 	do {
1627 		int i;
1628 
1629 		if (!writenote(&t->notes[0], file, foffset))
1630 			return 0;
1631 
1632 		if (first && !writenote(&info->psinfo, file, foffset))
1633 			return 0;
1634 		if (first && !writenote(&info->auxv, file, foffset))
1635 			return 0;
1636 
1637 		for (i = 1; i < info->thread_notes; ++i)
1638 			if (t->notes[i].data &&
1639 			    !writenote(&t->notes[i], file, foffset))
1640 				return 0;
1641 
1642 		first = 0;
1643 		t = t->next;
1644 	} while (t);
1645 
1646 	return 1;
1647 }
1648 
1649 static void free_note_info(struct elf_note_info *info)
1650 {
1651 	struct elf_thread_core_info *threads = info->thread;
1652 	while (threads) {
1653 		unsigned int i;
1654 		struct elf_thread_core_info *t = threads;
1655 		threads = t->next;
1656 		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1657 		for (i = 1; i < info->thread_notes; ++i)
1658 			kfree(t->notes[i].data);
1659 		kfree(t);
1660 	}
1661 	kfree(info->psinfo.data);
1662 }
1663 
1664 #else
1665 
1666 /* Here is the structure in which status of each thread is captured. */
1667 struct elf_thread_status
1668 {
1669 	struct list_head list;
1670 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1671 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1672 	struct task_struct *thread;
1673 #ifdef ELF_CORE_COPY_XFPREGS
1674 	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1675 #endif
1676 	struct memelfnote notes[3];
1677 	int num_notes;
1678 };
1679 
1680 /*
1681  * In order to add the specific thread information for the elf file format,
1682  * we need to keep a linked list of every threads pr_status and then create
1683  * a single section for them in the final core file.
1684  */
1685 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1686 {
1687 	int sz = 0;
1688 	struct task_struct *p = t->thread;
1689 	t->num_notes = 0;
1690 
1691 	fill_prstatus(&t->prstatus, p, signr);
1692 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1693 
1694 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1695 		  &(t->prstatus));
1696 	t->num_notes++;
1697 	sz += notesize(&t->notes[0]);
1698 
1699 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1700 								&t->fpu))) {
1701 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1702 			  &(t->fpu));
1703 		t->num_notes++;
1704 		sz += notesize(&t->notes[1]);
1705 	}
1706 
1707 #ifdef ELF_CORE_COPY_XFPREGS
1708 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1709 		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1710 			  sizeof(t->xfpu), &t->xfpu);
1711 		t->num_notes++;
1712 		sz += notesize(&t->notes[2]);
1713 	}
1714 #endif
1715 	return sz;
1716 }
1717 
1718 struct elf_note_info {
1719 	struct memelfnote *notes;
1720 	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1721 	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1722 	struct list_head thread_list;
1723 	elf_fpregset_t *fpu;
1724 #ifdef ELF_CORE_COPY_XFPREGS
1725 	elf_fpxregset_t *xfpu;
1726 #endif
1727 	int thread_status_size;
1728 	int numnote;
1729 };
1730 
1731 static int fill_note_info(struct elfhdr *elf, int phdrs,
1732 			  struct elf_note_info *info,
1733 			  long signr, struct pt_regs *regs)
1734 {
1735 #define	NUM_NOTES	6
1736 	struct list_head *t;
1737 
1738 	info->notes = NULL;
1739 	info->prstatus = NULL;
1740 	info->psinfo = NULL;
1741 	info->fpu = NULL;
1742 #ifdef ELF_CORE_COPY_XFPREGS
1743 	info->xfpu = NULL;
1744 #endif
1745 	INIT_LIST_HEAD(&info->thread_list);
1746 
1747 	info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1748 			      GFP_KERNEL);
1749 	if (!info->notes)
1750 		return 0;
1751 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1752 	if (!info->psinfo)
1753 		return 0;
1754 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1755 	if (!info->prstatus)
1756 		return 0;
1757 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1758 	if (!info->fpu)
1759 		return 0;
1760 #ifdef ELF_CORE_COPY_XFPREGS
1761 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1762 	if (!info->xfpu)
1763 		return 0;
1764 #endif
1765 
1766 	info->thread_status_size = 0;
1767 	if (signr) {
1768 		struct core_thread *ct;
1769 		struct elf_thread_status *ets;
1770 
1771 		for (ct = current->mm->core_state->dumper.next;
1772 						ct; ct = ct->next) {
1773 			ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1774 			if (!ets)
1775 				return 0;
1776 
1777 			ets->thread = ct->task;
1778 			list_add(&ets->list, &info->thread_list);
1779 		}
1780 
1781 		list_for_each(t, &info->thread_list) {
1782 			int sz;
1783 
1784 			ets = list_entry(t, struct elf_thread_status, list);
1785 			sz = elf_dump_thread_status(signr, ets);
1786 			info->thread_status_size += sz;
1787 		}
1788 	}
1789 	/* now collect the dump for the current */
1790 	memset(info->prstatus, 0, sizeof(*info->prstatus));
1791 	fill_prstatus(info->prstatus, current, signr);
1792 	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1793 
1794 	/* Set up header */
1795 	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1796 
1797 	/*
1798 	 * Set up the notes in similar form to SVR4 core dumps made
1799 	 * with info from their /proc.
1800 	 */
1801 
1802 	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1803 		  sizeof(*info->prstatus), info->prstatus);
1804 	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1805 	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1806 		  sizeof(*info->psinfo), info->psinfo);
1807 
1808 	info->numnote = 2;
1809 
1810 	fill_auxv_note(&info->notes[info->numnote++], current->mm);
1811 
1812 	/* Try to dump the FPU. */
1813 	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1814 							       info->fpu);
1815 	if (info->prstatus->pr_fpvalid)
1816 		fill_note(info->notes + info->numnote++,
1817 			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1818 #ifdef ELF_CORE_COPY_XFPREGS
1819 	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1820 		fill_note(info->notes + info->numnote++,
1821 			  "LINUX", ELF_CORE_XFPREG_TYPE,
1822 			  sizeof(*info->xfpu), info->xfpu);
1823 #endif
1824 
1825 	return 1;
1826 
1827 #undef NUM_NOTES
1828 }
1829 
1830 static size_t get_note_info_size(struct elf_note_info *info)
1831 {
1832 	int sz = 0;
1833 	int i;
1834 
1835 	for (i = 0; i < info->numnote; i++)
1836 		sz += notesize(info->notes + i);
1837 
1838 	sz += info->thread_status_size;
1839 
1840 	return sz;
1841 }
1842 
1843 static int write_note_info(struct elf_note_info *info,
1844 			   struct file *file, loff_t *foffset)
1845 {
1846 	int i;
1847 	struct list_head *t;
1848 
1849 	for (i = 0; i < info->numnote; i++)
1850 		if (!writenote(info->notes + i, file, foffset))
1851 			return 0;
1852 
1853 	/* write out the thread status notes section */
1854 	list_for_each(t, &info->thread_list) {
1855 		struct elf_thread_status *tmp =
1856 				list_entry(t, struct elf_thread_status, list);
1857 
1858 		for (i = 0; i < tmp->num_notes; i++)
1859 			if (!writenote(&tmp->notes[i], file, foffset))
1860 				return 0;
1861 	}
1862 
1863 	return 1;
1864 }
1865 
1866 static void free_note_info(struct elf_note_info *info)
1867 {
1868 	while (!list_empty(&info->thread_list)) {
1869 		struct list_head *tmp = info->thread_list.next;
1870 		list_del(tmp);
1871 		kfree(list_entry(tmp, struct elf_thread_status, list));
1872 	}
1873 
1874 	kfree(info->prstatus);
1875 	kfree(info->psinfo);
1876 	kfree(info->notes);
1877 	kfree(info->fpu);
1878 #ifdef ELF_CORE_COPY_XFPREGS
1879 	kfree(info->xfpu);
1880 #endif
1881 }
1882 
1883 #endif
1884 
1885 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1886 					struct vm_area_struct *gate_vma)
1887 {
1888 	struct vm_area_struct *ret = tsk->mm->mmap;
1889 
1890 	if (ret)
1891 		return ret;
1892 	return gate_vma;
1893 }
1894 /*
1895  * Helper function for iterating across a vma list.  It ensures that the caller
1896  * will visit `gate_vma' prior to terminating the search.
1897  */
1898 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1899 					struct vm_area_struct *gate_vma)
1900 {
1901 	struct vm_area_struct *ret;
1902 
1903 	ret = this_vma->vm_next;
1904 	if (ret)
1905 		return ret;
1906 	if (this_vma == gate_vma)
1907 		return NULL;
1908 	return gate_vma;
1909 }
1910 
1911 /*
1912  * Actual dumper
1913  *
1914  * This is a two-pass process; first we find the offsets of the bits,
1915  * and then they are actually written out.  If we run out of core limit
1916  * we just truncate.
1917  */
1918 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1919 {
1920 	int has_dumped = 0;
1921 	mm_segment_t fs;
1922 	int segs;
1923 	size_t size = 0;
1924 	struct vm_area_struct *vma, *gate_vma;
1925 	struct elfhdr *elf = NULL;
1926 	loff_t offset = 0, dataoff, foffset;
1927 	unsigned long mm_flags;
1928 	struct elf_note_info info;
1929 
1930 	/*
1931 	 * We no longer stop all VM operations.
1932 	 *
1933 	 * This is because those proceses that could possibly change map_count
1934 	 * or the mmap / vma pages are now blocked in do_exit on current
1935 	 * finishing this core dump.
1936 	 *
1937 	 * Only ptrace can touch these memory addresses, but it doesn't change
1938 	 * the map_count or the pages allocated. So no possibility of crashing
1939 	 * exists while dumping the mm->vm_next areas to the core file.
1940 	 */
1941 
1942 	/* alloc memory for large data structures: too large to be on stack */
1943 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1944 	if (!elf)
1945 		goto out;
1946 
1947 	segs = current->mm->map_count;
1948 #ifdef ELF_CORE_EXTRA_PHDRS
1949 	segs += ELF_CORE_EXTRA_PHDRS;
1950 #endif
1951 
1952 	gate_vma = get_gate_vma(current);
1953 	if (gate_vma != NULL)
1954 		segs++;
1955 
1956 	/*
1957 	 * Collect all the non-memory information about the process for the
1958 	 * notes.  This also sets up the file header.
1959 	 */
1960 	if (!fill_note_info(elf, segs + 1, /* including notes section */
1961 			    &info, signr, regs))
1962 		goto cleanup;
1963 
1964 	has_dumped = 1;
1965 	current->flags |= PF_DUMPCORE;
1966 
1967 	fs = get_fs();
1968 	set_fs(KERNEL_DS);
1969 
1970 	DUMP_WRITE(elf, sizeof(*elf));
1971 	offset += sizeof(*elf);				/* Elf header */
1972 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1973 	foffset = offset;
1974 
1975 	/* Write notes phdr entry */
1976 	{
1977 		struct elf_phdr phdr;
1978 		size_t sz = get_note_info_size(&info);
1979 
1980 		sz += elf_coredump_extra_notes_size();
1981 
1982 		fill_elf_note_phdr(&phdr, sz, offset);
1983 		offset += sz;
1984 		DUMP_WRITE(&phdr, sizeof(phdr));
1985 	}
1986 
1987 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1988 
1989 	/*
1990 	 * We must use the same mm->flags while dumping core to avoid
1991 	 * inconsistency between the program headers and bodies, otherwise an
1992 	 * unusable core file can be generated.
1993 	 */
1994 	mm_flags = current->mm->flags;
1995 
1996 	/* Write program headers for segments dump */
1997 	for (vma = first_vma(current, gate_vma); vma != NULL;
1998 			vma = next_vma(vma, gate_vma)) {
1999 		struct elf_phdr phdr;
2000 
2001 		phdr.p_type = PT_LOAD;
2002 		phdr.p_offset = offset;
2003 		phdr.p_vaddr = vma->vm_start;
2004 		phdr.p_paddr = 0;
2005 		phdr.p_filesz = vma_dump_size(vma, mm_flags);
2006 		phdr.p_memsz = vma->vm_end - vma->vm_start;
2007 		offset += phdr.p_filesz;
2008 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2009 		if (vma->vm_flags & VM_WRITE)
2010 			phdr.p_flags |= PF_W;
2011 		if (vma->vm_flags & VM_EXEC)
2012 			phdr.p_flags |= PF_X;
2013 		phdr.p_align = ELF_EXEC_PAGESIZE;
2014 
2015 		DUMP_WRITE(&phdr, sizeof(phdr));
2016 	}
2017 
2018 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2019 	ELF_CORE_WRITE_EXTRA_PHDRS;
2020 #endif
2021 
2022  	/* write out the notes section */
2023 	if (!write_note_info(&info, file, &foffset))
2024 		goto end_coredump;
2025 
2026 	if (elf_coredump_extra_notes_write(file, &foffset))
2027 		goto end_coredump;
2028 
2029 	/* Align to page */
2030 	DUMP_SEEK(dataoff - foffset);
2031 
2032 	for (vma = first_vma(current, gate_vma); vma != NULL;
2033 			vma = next_vma(vma, gate_vma)) {
2034 		unsigned long addr;
2035 		unsigned long end;
2036 
2037 		end = vma->vm_start + vma_dump_size(vma, mm_flags);
2038 
2039 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2040 			struct page *page;
2041 			struct vm_area_struct *tmp_vma;
2042 
2043 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2044 						&page, &tmp_vma) <= 0) {
2045 				DUMP_SEEK(PAGE_SIZE);
2046 			} else {
2047 				if (page == ZERO_PAGE(0)) {
2048 					if (!dump_seek(file, PAGE_SIZE)) {
2049 						page_cache_release(page);
2050 						goto end_coredump;
2051 					}
2052 				} else {
2053 					void *kaddr;
2054 					flush_cache_page(tmp_vma, addr,
2055 							 page_to_pfn(page));
2056 					kaddr = kmap(page);
2057 					if ((size += PAGE_SIZE) > limit ||
2058 					    !dump_write(file, kaddr,
2059 					    PAGE_SIZE)) {
2060 						kunmap(page);
2061 						page_cache_release(page);
2062 						goto end_coredump;
2063 					}
2064 					kunmap(page);
2065 				}
2066 				page_cache_release(page);
2067 			}
2068 		}
2069 	}
2070 
2071 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2072 	ELF_CORE_WRITE_EXTRA_DATA;
2073 #endif
2074 
2075 end_coredump:
2076 	set_fs(fs);
2077 
2078 cleanup:
2079 	free_note_info(&info);
2080 	kfree(elf);
2081 out:
2082 	return has_dumped;
2083 }
2084 
2085 #endif		/* USE_ELF_CORE_DUMP */
2086 
2087 static int __init init_elf_binfmt(void)
2088 {
2089 	return register_binfmt(&elf_format);
2090 }
2091 
2092 static void __exit exit_elf_binfmt(void)
2093 {
2094 	/* Remove the COFF and ELF loaders. */
2095 	unregister_binfmt(&elf_format);
2096 }
2097 
2098 core_initcall(init_elf_binfmt);
2099 module_exit(exit_elf_binfmt);
2100 MODULE_LICENSE("GPL");
2101