xref: /linux/fs/binfmt_elf.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
50 
51 #ifndef elf_addr_t
52 #define elf_addr_t unsigned long
53 #endif
54 
55 /*
56  * If we don't support core dumping, then supply a NULL so we
57  * don't even try.
58  */
59 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
60 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
61 #else
62 #define elf_core_dump	NULL
63 #endif
64 
65 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
66 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
67 #else
68 #define ELF_MIN_ALIGN	PAGE_SIZE
69 #endif
70 
71 #ifndef ELF_CORE_EFLAGS
72 #define ELF_CORE_EFLAGS	0
73 #endif
74 
75 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
76 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
77 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
78 
79 static struct linux_binfmt elf_format = {
80 		.module		= THIS_MODULE,
81 		.load_binary	= load_elf_binary,
82 		.load_shlib	= load_elf_library,
83 		.core_dump	= elf_core_dump,
84 		.min_coredump	= ELF_EXEC_PAGESIZE
85 };
86 
87 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
88 
89 static int set_brk(unsigned long start, unsigned long end)
90 {
91 	start = ELF_PAGEALIGN(start);
92 	end = ELF_PAGEALIGN(end);
93 	if (end > start) {
94 		unsigned long addr;
95 		down_write(&current->mm->mmap_sem);
96 		addr = do_brk(start, end - start);
97 		up_write(&current->mm->mmap_sem);
98 		if (BAD_ADDR(addr))
99 			return addr;
100 	}
101 	current->mm->start_brk = current->mm->brk = end;
102 	return 0;
103 }
104 
105 /* We need to explicitly zero any fractional pages
106    after the data section (i.e. bss).  This would
107    contain the junk from the file that should not
108    be in memory
109  */
110 static int padzero(unsigned long elf_bss)
111 {
112 	unsigned long nbyte;
113 
114 	nbyte = ELF_PAGEOFFSET(elf_bss);
115 	if (nbyte) {
116 		nbyte = ELF_MIN_ALIGN - nbyte;
117 		if (clear_user((void __user *) elf_bss, nbyte))
118 			return -EFAULT;
119 	}
120 	return 0;
121 }
122 
123 /* Let's use some macros to make this stack manipulation a litle clearer */
124 #ifdef CONFIG_STACK_GROWSUP
125 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
126 #define STACK_ROUND(sp, items) \
127 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
128 #define STACK_ALLOC(sp, len) ({ \
129 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
130 	old_sp; })
131 #else
132 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
133 #define STACK_ROUND(sp, items) \
134 	(((unsigned long) (sp - items)) &~ 15UL)
135 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
136 #endif
137 
138 static int
139 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
140 		int interp_aout, unsigned long load_addr,
141 		unsigned long interp_load_addr)
142 {
143 	unsigned long p = bprm->p;
144 	int argc = bprm->argc;
145 	int envc = bprm->envc;
146 	elf_addr_t __user *argv;
147 	elf_addr_t __user *envp;
148 	elf_addr_t __user *sp;
149 	elf_addr_t __user *u_platform;
150 	const char *k_platform = ELF_PLATFORM;
151 	int items;
152 	elf_addr_t *elf_info;
153 	int ei_index = 0;
154 	struct task_struct *tsk = current;
155 
156 	/*
157 	 * If this architecture has a platform capability string, copy it
158 	 * to userspace.  In some cases (Sparc), this info is impossible
159 	 * for userspace to get any other way, in others (i386) it is
160 	 * merely difficult.
161 	 */
162 	u_platform = NULL;
163 	if (k_platform) {
164 		size_t len = strlen(k_platform) + 1;
165 
166 		/*
167 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
168 		 * evictions by the processes running on the same package. One
169 		 * thing we can do is to shuffle the initial stack for them.
170 		 */
171 
172 		p = arch_align_stack(p);
173 
174 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
175 		if (__copy_to_user(u_platform, k_platform, len))
176 			return -EFAULT;
177 	}
178 
179 	/* Create the ELF interpreter info */
180 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
181 #define NEW_AUX_ENT(id, val) \
182 	do { \
183 		elf_info[ei_index++] = id; \
184 		elf_info[ei_index++] = val; \
185 	} while (0)
186 
187 #ifdef ARCH_DLINFO
188 	/*
189 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 	 * AUXV.
191 	 */
192 	ARCH_DLINFO;
193 #endif
194 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 	NEW_AUX_ENT(AT_FLAGS, 0);
202 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 	NEW_AUX_ENT(AT_UID, tsk->uid);
204 	NEW_AUX_ENT(AT_EUID, tsk->euid);
205 	NEW_AUX_ENT(AT_GID, tsk->gid);
206 	NEW_AUX_ENT(AT_EGID, tsk->egid);
207  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1);
226 	if (interp_aout) {
227 		items += 3; /* a.out interpreters require argv & envp too */
228 	} else {
229 		items += 1; /* ELF interpreters only put argc on the stack */
230 	}
231 	bprm->p = STACK_ROUND(sp, items);
232 
233 	/* Point sp at the lowest address on the stack */
234 #ifdef CONFIG_STACK_GROWSUP
235 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
236 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
237 #else
238 	sp = (elf_addr_t __user *)bprm->p;
239 #endif
240 
241 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
242 	if (__put_user(argc, sp++))
243 		return -EFAULT;
244 	if (interp_aout) {
245 		argv = sp + 2;
246 		envp = argv + argc + 1;
247 		__put_user((elf_addr_t)(unsigned long)argv, sp++);
248 		__put_user((elf_addr_t)(unsigned long)envp, sp++);
249 	} else {
250 		argv = sp;
251 		envp = argv + argc + 1;
252 	}
253 
254 	/* Populate argv and envp */
255 	p = current->mm->arg_end = current->mm->arg_start;
256 	while (argc-- > 0) {
257 		size_t len;
258 		__put_user((elf_addr_t)p, argv++);
259 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
260 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
261 			return 0;
262 		p += len;
263 	}
264 	if (__put_user(0, argv))
265 		return -EFAULT;
266 	current->mm->arg_end = current->mm->env_start = p;
267 	while (envc-- > 0) {
268 		size_t len;
269 		__put_user((elf_addr_t)p, envp++);
270 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
271 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
272 			return 0;
273 		p += len;
274 	}
275 	if (__put_user(0, envp))
276 		return -EFAULT;
277 	current->mm->env_end = p;
278 
279 	/* Put the elf_info on the stack in the right place.  */
280 	sp = (elf_addr_t __user *)envp + 1;
281 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
282 		return -EFAULT;
283 	return 0;
284 }
285 
286 #ifndef elf_map
287 
288 static unsigned long elf_map(struct file *filep, unsigned long addr,
289 		struct elf_phdr *eppnt, int prot, int type)
290 {
291 	unsigned long map_addr;
292 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
293 
294 	down_write(&current->mm->mmap_sem);
295 	/* mmap() will return -EINVAL if given a zero size, but a
296 	 * segment with zero filesize is perfectly valid */
297 	if (eppnt->p_filesz + pageoffset)
298 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
299 				   eppnt->p_filesz + pageoffset, prot, type,
300 				   eppnt->p_offset - pageoffset);
301 	else
302 		map_addr = ELF_PAGESTART(addr);
303 	up_write(&current->mm->mmap_sem);
304 	return(map_addr);
305 }
306 
307 #endif /* !elf_map */
308 
309 /* This is much more generalized than the library routine read function,
310    so we keep this separate.  Technically the library read function
311    is only provided so that we can read a.out libraries that have
312    an ELF header */
313 
314 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
315 		struct file *interpreter, unsigned long *interp_load_addr)
316 {
317 	struct elf_phdr *elf_phdata;
318 	struct elf_phdr *eppnt;
319 	unsigned long load_addr = 0;
320 	int load_addr_set = 0;
321 	unsigned long last_bss = 0, elf_bss = 0;
322 	unsigned long error = ~0UL;
323 	int retval, i, size;
324 
325 	/* First of all, some simple consistency checks */
326 	if (interp_elf_ex->e_type != ET_EXEC &&
327 	    interp_elf_ex->e_type != ET_DYN)
328 		goto out;
329 	if (!elf_check_arch(interp_elf_ex))
330 		goto out;
331 	if (!interpreter->f_op || !interpreter->f_op->mmap)
332 		goto out;
333 
334 	/*
335 	 * If the size of this structure has changed, then punt, since
336 	 * we will be doing the wrong thing.
337 	 */
338 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 		goto out;
340 	if (interp_elf_ex->e_phnum < 1 ||
341 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 		goto out;
343 
344 	/* Now read in all of the header information */
345 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
346 	if (size > ELF_MIN_ALIGN)
347 		goto out;
348 	elf_phdata = kmalloc(size, GFP_KERNEL);
349 	if (!elf_phdata)
350 		goto out;
351 
352 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
353 			     (char *)elf_phdata,size);
354 	error = -EIO;
355 	if (retval != size) {
356 		if (retval < 0)
357 			error = retval;
358 		goto out_close;
359 	}
360 
361 	eppnt = elf_phdata;
362 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
363 		if (eppnt->p_type == PT_LOAD) {
364 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 			int elf_prot = 0;
366 			unsigned long vaddr = 0;
367 			unsigned long k, map_addr;
368 
369 			if (eppnt->p_flags & PF_R)
370 		    		elf_prot = PROT_READ;
371 			if (eppnt->p_flags & PF_W)
372 				elf_prot |= PROT_WRITE;
373 			if (eppnt->p_flags & PF_X)
374 				elf_prot |= PROT_EXEC;
375 			vaddr = eppnt->p_vaddr;
376 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
377 				elf_type |= MAP_FIXED;
378 
379 			map_addr = elf_map(interpreter, load_addr + vaddr,
380 					   eppnt, elf_prot, elf_type);
381 			error = map_addr;
382 			if (BAD_ADDR(map_addr))
383 				goto out_close;
384 
385 			if (!load_addr_set &&
386 			    interp_elf_ex->e_type == ET_DYN) {
387 				load_addr = map_addr - ELF_PAGESTART(vaddr);
388 				load_addr_set = 1;
389 			}
390 
391 			/*
392 			 * Check to see if the section's size will overflow the
393 			 * allowed task size. Note that p_filesz must always be
394 			 * <= p_memsize so it's only necessary to check p_memsz.
395 			 */
396 			k = load_addr + eppnt->p_vaddr;
397 			if (BAD_ADDR(k) ||
398 			    eppnt->p_filesz > eppnt->p_memsz ||
399 			    eppnt->p_memsz > TASK_SIZE ||
400 			    TASK_SIZE - eppnt->p_memsz < k) {
401 				error = -ENOMEM;
402 				goto out_close;
403 			}
404 
405 			/*
406 			 * Find the end of the file mapping for this phdr, and
407 			 * keep track of the largest address we see for this.
408 			 */
409 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
410 			if (k > elf_bss)
411 				elf_bss = k;
412 
413 			/*
414 			 * Do the same thing for the memory mapping - between
415 			 * elf_bss and last_bss is the bss section.
416 			 */
417 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
418 			if (k > last_bss)
419 				last_bss = k;
420 		}
421 	}
422 
423 	/*
424 	 * Now fill out the bss section.  First pad the last page up
425 	 * to the page boundary, and then perform a mmap to make sure
426 	 * that there are zero-mapped pages up to and including the
427 	 * last bss page.
428 	 */
429 	if (padzero(elf_bss)) {
430 		error = -EFAULT;
431 		goto out_close;
432 	}
433 
434 	/* What we have mapped so far */
435 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
436 
437 	/* Map the last of the bss segment */
438 	if (last_bss > elf_bss) {
439 		down_write(&current->mm->mmap_sem);
440 		error = do_brk(elf_bss, last_bss - elf_bss);
441 		up_write(&current->mm->mmap_sem);
442 		if (BAD_ADDR(error))
443 			goto out_close;
444 	}
445 
446 	*interp_load_addr = load_addr;
447 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
448 
449 out_close:
450 	kfree(elf_phdata);
451 out:
452 	return error;
453 }
454 
455 static unsigned long load_aout_interp(struct exec *interp_ex,
456 		struct file *interpreter)
457 {
458 	unsigned long text_data, elf_entry = ~0UL;
459 	char __user * addr;
460 	loff_t offset;
461 
462 	current->mm->end_code = interp_ex->a_text;
463 	text_data = interp_ex->a_text + interp_ex->a_data;
464 	current->mm->end_data = text_data;
465 	current->mm->brk = interp_ex->a_bss + text_data;
466 
467 	switch (N_MAGIC(*interp_ex)) {
468 	case OMAGIC:
469 		offset = 32;
470 		addr = (char __user *)0;
471 		break;
472 	case ZMAGIC:
473 	case QMAGIC:
474 		offset = N_TXTOFF(*interp_ex);
475 		addr = (char __user *)N_TXTADDR(*interp_ex);
476 		break;
477 	default:
478 		goto out;
479 	}
480 
481 	down_write(&current->mm->mmap_sem);
482 	do_brk(0, text_data);
483 	up_write(&current->mm->mmap_sem);
484 	if (!interpreter->f_op || !interpreter->f_op->read)
485 		goto out;
486 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
487 		goto out;
488 	flush_icache_range((unsigned long)addr,
489 	                   (unsigned long)addr + text_data);
490 
491 	down_write(&current->mm->mmap_sem);
492 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
493 		interp_ex->a_bss);
494 	up_write(&current->mm->mmap_sem);
495 	elf_entry = interp_ex->a_entry;
496 
497 out:
498 	return elf_entry;
499 }
500 
501 /*
502  * These are the functions used to load ELF style executables and shared
503  * libraries.  There is no binary dependent code anywhere else.
504  */
505 
506 #define INTERPRETER_NONE 0
507 #define INTERPRETER_AOUT 1
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if (current->flags & PF_RANDOMIZE) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
712 
713 	/* Some simple consistency checks for the interpreter */
714 	if (elf_interpreter) {
715 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
716 
717 		/* Now figure out which format our binary is */
718 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
719 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
720 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
721 			interpreter_type = INTERPRETER_ELF;
722 
723 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
724 			interpreter_type &= ~INTERPRETER_ELF;
725 
726 		retval = -ELIBBAD;
727 		if (!interpreter_type)
728 			goto out_free_dentry;
729 
730 		/* Make sure only one type was selected */
731 		if ((interpreter_type & INTERPRETER_ELF) &&
732 		     interpreter_type != INTERPRETER_ELF) {
733 	     		// FIXME - ratelimit this before re-enabling
734 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
735 			interpreter_type = INTERPRETER_ELF;
736 		}
737 		/* Verify the interpreter has a valid arch */
738 		if ((interpreter_type == INTERPRETER_ELF) &&
739 		    !elf_check_arch(&loc->interp_elf_ex))
740 			goto out_free_dentry;
741 	} else {
742 		/* Executables without an interpreter also need a personality  */
743 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
744 	}
745 
746 	/* OK, we are done with that, now set up the arg stuff,
747 	   and then start this sucker up */
748 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
749 		char *passed_p = passed_fileno;
750 		sprintf(passed_fileno, "%d", elf_exec_fileno);
751 
752 		if (elf_interpreter) {
753 			retval = copy_strings_kernel(1, &passed_p, bprm);
754 			if (retval)
755 				goto out_free_dentry;
756 			bprm->argc++;
757 		}
758 	}
759 
760 	/* Flush all traces of the currently running executable */
761 	retval = flush_old_exec(bprm);
762 	if (retval)
763 		goto out_free_dentry;
764 
765 	/* Discard our unneeded old files struct */
766 	if (files) {
767 		put_files_struct(files);
768 		files = NULL;
769 	}
770 
771 	/* OK, This is the point of no return */
772 	current->mm->start_data = 0;
773 	current->mm->end_data = 0;
774 	current->mm->end_code = 0;
775 	current->mm->mmap = NULL;
776 	current->flags &= ~PF_FORKNOEXEC;
777 	current->mm->def_flags = def_flags;
778 
779 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
780 	   may depend on the personality.  */
781 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
782 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
783 		current->personality |= READ_IMPLIES_EXEC;
784 
785 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
786 		current->flags |= PF_RANDOMIZE;
787 	arch_pick_mmap_layout(current->mm);
788 
789 	/* Do this so that we can load the interpreter, if need be.  We will
790 	   change some of these later */
791 	current->mm->free_area_cache = current->mm->mmap_base;
792 	current->mm->cached_hole_size = 0;
793 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
794 				 executable_stack);
795 	if (retval < 0) {
796 		send_sig(SIGKILL, current, 0);
797 		goto out_free_dentry;
798 	}
799 
800 	current->mm->start_stack = bprm->p;
801 
802 	/* Now we do a little grungy work by mmaping the ELF image into
803 	   the correct location in memory.  At this point, we assume that
804 	   the image should be loaded at fixed address, not at a variable
805 	   address. */
806 	for(i = 0, elf_ppnt = elf_phdata;
807 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
808 		int elf_prot = 0, elf_flags;
809 		unsigned long k, vaddr;
810 
811 		if (elf_ppnt->p_type != PT_LOAD)
812 			continue;
813 
814 		if (unlikely (elf_brk > elf_bss)) {
815 			unsigned long nbyte;
816 
817 			/* There was a PT_LOAD segment with p_memsz > p_filesz
818 			   before this one. Map anonymous pages, if needed,
819 			   and clear the area.  */
820 			retval = set_brk (elf_bss + load_bias,
821 					  elf_brk + load_bias);
822 			if (retval) {
823 				send_sig(SIGKILL, current, 0);
824 				goto out_free_dentry;
825 			}
826 			nbyte = ELF_PAGEOFFSET(elf_bss);
827 			if (nbyte) {
828 				nbyte = ELF_MIN_ALIGN - nbyte;
829 				if (nbyte > elf_brk - elf_bss)
830 					nbyte = elf_brk - elf_bss;
831 				if (clear_user((void __user *)elf_bss +
832 							load_bias, nbyte)) {
833 					/*
834 					 * This bss-zeroing can fail if the ELF
835 					 * file specifies odd protections. So
836 					 * we don't check the return value
837 					 */
838 				}
839 			}
840 		}
841 
842 		if (elf_ppnt->p_flags & PF_R)
843 			elf_prot |= PROT_READ;
844 		if (elf_ppnt->p_flags & PF_W)
845 			elf_prot |= PROT_WRITE;
846 		if (elf_ppnt->p_flags & PF_X)
847 			elf_prot |= PROT_EXEC;
848 
849 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
850 
851 		vaddr = elf_ppnt->p_vaddr;
852 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
853 			elf_flags |= MAP_FIXED;
854 		} else if (loc->elf_ex.e_type == ET_DYN) {
855 			/* Try and get dynamic programs out of the way of the
856 			 * default mmap base, as well as whatever program they
857 			 * might try to exec.  This is because the brk will
858 			 * follow the loader, and is not movable.  */
859 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
860 		}
861 
862 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
863 				elf_prot, elf_flags);
864 		if (BAD_ADDR(error)) {
865 			send_sig(SIGKILL, current, 0);
866 			goto out_free_dentry;
867 		}
868 
869 		if (!load_addr_set) {
870 			load_addr_set = 1;
871 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
872 			if (loc->elf_ex.e_type == ET_DYN) {
873 				load_bias += error -
874 				             ELF_PAGESTART(load_bias + vaddr);
875 				load_addr += load_bias;
876 				reloc_func_desc = load_bias;
877 			}
878 		}
879 		k = elf_ppnt->p_vaddr;
880 		if (k < start_code)
881 			start_code = k;
882 		if (start_data < k)
883 			start_data = k;
884 
885 		/*
886 		 * Check to see if the section's size will overflow the
887 		 * allowed task size. Note that p_filesz must always be
888 		 * <= p_memsz so it is only necessary to check p_memsz.
889 		 */
890 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 		    elf_ppnt->p_memsz > TASK_SIZE ||
892 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
893 			/* set_brk can never work. Avoid overflows. */
894 			send_sig(SIGKILL, current, 0);
895 			goto out_free_dentry;
896 		}
897 
898 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
899 
900 		if (k > elf_bss)
901 			elf_bss = k;
902 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
903 			end_code = k;
904 		if (end_data < k)
905 			end_data = k;
906 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
907 		if (k > elf_brk)
908 			elf_brk = k;
909 	}
910 
911 	loc->elf_ex.e_entry += load_bias;
912 	elf_bss += load_bias;
913 	elf_brk += load_bias;
914 	start_code += load_bias;
915 	end_code += load_bias;
916 	start_data += load_bias;
917 	end_data += load_bias;
918 
919 	/* Calling set_brk effectively mmaps the pages that we need
920 	 * for the bss and break sections.  We must do this before
921 	 * mapping in the interpreter, to make sure it doesn't wind
922 	 * up getting placed where the bss needs to go.
923 	 */
924 	retval = set_brk(elf_bss, elf_brk);
925 	if (retval) {
926 		send_sig(SIGKILL, current, 0);
927 		goto out_free_dentry;
928 	}
929 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
930 		send_sig(SIGSEGV, current, 0);
931 		retval = -EFAULT; /* Nobody gets to see this, but.. */
932 		goto out_free_dentry;
933 	}
934 
935 	if (elf_interpreter) {
936 		if (interpreter_type == INTERPRETER_AOUT)
937 			elf_entry = load_aout_interp(&loc->interp_ex,
938 						     interpreter);
939 		else
940 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
941 						    interpreter,
942 						    &interp_load_addr);
943 		if (BAD_ADDR(elf_entry)) {
944 			force_sig(SIGSEGV, current);
945 			retval = IS_ERR((void *)elf_entry) ?
946 					(int)elf_entry : -EINVAL;
947 			goto out_free_dentry;
948 		}
949 		reloc_func_desc = interp_load_addr;
950 
951 		allow_write_access(interpreter);
952 		fput(interpreter);
953 		kfree(elf_interpreter);
954 	} else {
955 		elf_entry = loc->elf_ex.e_entry;
956 		if (BAD_ADDR(elf_entry)) {
957 			force_sig(SIGSEGV, current);
958 			retval = -EINVAL;
959 			goto out_free_dentry;
960 		}
961 	}
962 
963 	kfree(elf_phdata);
964 
965 	if (interpreter_type != INTERPRETER_AOUT)
966 		sys_close(elf_exec_fileno);
967 
968 	set_binfmt(&elf_format);
969 
970 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
971 	retval = arch_setup_additional_pages(bprm, executable_stack);
972 	if (retval < 0) {
973 		send_sig(SIGKILL, current, 0);
974 		goto out;
975 	}
976 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
977 
978 	compute_creds(bprm);
979 	current->flags &= ~PF_FORKNOEXEC;
980 	create_elf_tables(bprm, &loc->elf_ex,
981 			  (interpreter_type == INTERPRETER_AOUT),
982 			  load_addr, interp_load_addr);
983 	/* N.B. passed_fileno might not be initialized? */
984 	if (interpreter_type == INTERPRETER_AOUT)
985 		current->mm->arg_start += strlen(passed_fileno) + 1;
986 	current->mm->end_code = end_code;
987 	current->mm->start_code = start_code;
988 	current->mm->start_data = start_data;
989 	current->mm->end_data = end_data;
990 	current->mm->start_stack = bprm->p;
991 
992 	if (current->personality & MMAP_PAGE_ZERO) {
993 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
994 		   and some applications "depend" upon this behavior.
995 		   Since we do not have the power to recompile these, we
996 		   emulate the SVr4 behavior. Sigh. */
997 		down_write(&current->mm->mmap_sem);
998 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
999 				MAP_FIXED | MAP_PRIVATE, 0);
1000 		up_write(&current->mm->mmap_sem);
1001 	}
1002 
1003 #ifdef ELF_PLAT_INIT
1004 	/*
1005 	 * The ABI may specify that certain registers be set up in special
1006 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1007 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1008 	 * that the e_entry field is the address of the function descriptor
1009 	 * for the startup routine, rather than the address of the startup
1010 	 * routine itself.  This macro performs whatever initialization to
1011 	 * the regs structure is required as well as any relocations to the
1012 	 * function descriptor entries when executing dynamically links apps.
1013 	 */
1014 	ELF_PLAT_INIT(regs, reloc_func_desc);
1015 #endif
1016 
1017 	start_thread(regs, elf_entry, bprm->p);
1018 	if (unlikely(current->ptrace & PT_PTRACED)) {
1019 		if (current->ptrace & PT_TRACE_EXEC)
1020 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1021 		else
1022 			send_sig(SIGTRAP, current, 0);
1023 	}
1024 	retval = 0;
1025 out:
1026 	kfree(loc);
1027 out_ret:
1028 	return retval;
1029 
1030 	/* error cleanup */
1031 out_free_dentry:
1032 	allow_write_access(interpreter);
1033 	if (interpreter)
1034 		fput(interpreter);
1035 out_free_interp:
1036 	kfree(elf_interpreter);
1037 out_free_file:
1038 	sys_close(elf_exec_fileno);
1039 out_free_fh:
1040 	if (files) {
1041 		put_files_struct(current->files);
1042 		current->files = files;
1043 	}
1044 out_free_ph:
1045 	kfree(elf_phdata);
1046 	goto out;
1047 }
1048 
1049 /* This is really simpleminded and specialized - we are loading an
1050    a.out library that is given an ELF header. */
1051 static int load_elf_library(struct file *file)
1052 {
1053 	struct elf_phdr *elf_phdata;
1054 	struct elf_phdr *eppnt;
1055 	unsigned long elf_bss, bss, len;
1056 	int retval, error, i, j;
1057 	struct elfhdr elf_ex;
1058 
1059 	error = -ENOEXEC;
1060 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1061 	if (retval != sizeof(elf_ex))
1062 		goto out;
1063 
1064 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1065 		goto out;
1066 
1067 	/* First of all, some simple consistency checks */
1068 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1069 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1070 		goto out;
1071 
1072 	/* Now read in all of the header information */
1073 
1074 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1075 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1076 
1077 	error = -ENOMEM;
1078 	elf_phdata = kmalloc(j, GFP_KERNEL);
1079 	if (!elf_phdata)
1080 		goto out;
1081 
1082 	eppnt = elf_phdata;
1083 	error = -ENOEXEC;
1084 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1085 	if (retval != j)
1086 		goto out_free_ph;
1087 
1088 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1089 		if ((eppnt + i)->p_type == PT_LOAD)
1090 			j++;
1091 	if (j != 1)
1092 		goto out_free_ph;
1093 
1094 	while (eppnt->p_type != PT_LOAD)
1095 		eppnt++;
1096 
1097 	/* Now use mmap to map the library into memory. */
1098 	down_write(&current->mm->mmap_sem);
1099 	error = do_mmap(file,
1100 			ELF_PAGESTART(eppnt->p_vaddr),
1101 			(eppnt->p_filesz +
1102 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1103 			PROT_READ | PROT_WRITE | PROT_EXEC,
1104 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1105 			(eppnt->p_offset -
1106 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1107 	up_write(&current->mm->mmap_sem);
1108 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1109 		goto out_free_ph;
1110 
1111 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1112 	if (padzero(elf_bss)) {
1113 		error = -EFAULT;
1114 		goto out_free_ph;
1115 	}
1116 
1117 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1118 			    ELF_MIN_ALIGN - 1);
1119 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1120 	if (bss > len) {
1121 		down_write(&current->mm->mmap_sem);
1122 		do_brk(len, bss - len);
1123 		up_write(&current->mm->mmap_sem);
1124 	}
1125 	error = 0;
1126 
1127 out_free_ph:
1128 	kfree(elf_phdata);
1129 out:
1130 	return error;
1131 }
1132 
1133 /*
1134  * Note that some platforms still use traditional core dumps and not
1135  * the ELF core dump.  Each platform can select it as appropriate.
1136  */
1137 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1138 
1139 /*
1140  * ELF core dumper
1141  *
1142  * Modelled on fs/exec.c:aout_core_dump()
1143  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1144  */
1145 /*
1146  * These are the only things you should do on a core-file: use only these
1147  * functions to write out all the necessary info.
1148  */
1149 static int dump_write(struct file *file, const void *addr, int nr)
1150 {
1151 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1152 }
1153 
1154 static int dump_seek(struct file *file, loff_t off)
1155 {
1156 	if (file->f_op->llseek) {
1157 		if (file->f_op->llseek(file, off, 0) != off)
1158 			return 0;
1159 	} else
1160 		file->f_pos = off;
1161 	return 1;
1162 }
1163 
1164 /*
1165  * Decide whether a segment is worth dumping; default is yes to be
1166  * sure (missing info is worse than too much; etc).
1167  * Personally I'd include everything, and use the coredump limit...
1168  *
1169  * I think we should skip something. But I am not sure how. H.J.
1170  */
1171 static int maydump(struct vm_area_struct *vma)
1172 {
1173 	/* Do not dump I/O mapped devices or special mappings */
1174 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1175 		return 0;
1176 
1177 	/* Dump shared memory only if mapped from an anonymous file. */
1178 	if (vma->vm_flags & VM_SHARED)
1179 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1180 
1181 	/* If it hasn't been written to, don't write it out */
1182 	if (!vma->anon_vma)
1183 		return 0;
1184 
1185 	return 1;
1186 }
1187 
1188 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
1189 
1190 /* An ELF note in memory */
1191 struct memelfnote
1192 {
1193 	const char *name;
1194 	int type;
1195 	unsigned int datasz;
1196 	void *data;
1197 };
1198 
1199 static int notesize(struct memelfnote *en)
1200 {
1201 	int sz;
1202 
1203 	sz = sizeof(struct elf_note);
1204 	sz += roundup(strlen(en->name) + 1, 4);
1205 	sz += roundup(en->datasz, 4);
1206 
1207 	return sz;
1208 }
1209 
1210 #define DUMP_WRITE(addr, nr)	\
1211 	do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1212 #define DUMP_SEEK(off)	\
1213 	do { if (!dump_seek(file, (off))) return 0; } while(0)
1214 
1215 static int writenote(struct memelfnote *men, struct file *file)
1216 {
1217 	struct elf_note en;
1218 
1219 	en.n_namesz = strlen(men->name) + 1;
1220 	en.n_descsz = men->datasz;
1221 	en.n_type = men->type;
1222 
1223 	DUMP_WRITE(&en, sizeof(en));
1224 	DUMP_WRITE(men->name, en.n_namesz);
1225 	/* XXX - cast from long long to long to avoid need for libgcc.a */
1226 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1227 	DUMP_WRITE(men->data, men->datasz);
1228 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1229 
1230 	return 1;
1231 }
1232 #undef DUMP_WRITE
1233 #undef DUMP_SEEK
1234 
1235 #define DUMP_WRITE(addr, nr)	\
1236 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1237 		goto end_coredump;
1238 #define DUMP_SEEK(off)	\
1239 	if (!dump_seek(file, (off))) \
1240 		goto end_coredump;
1241 
1242 static void fill_elf_header(struct elfhdr *elf, int segs)
1243 {
1244 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1245 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1246 	elf->e_ident[EI_DATA] = ELF_DATA;
1247 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1248 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1249 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1250 
1251 	elf->e_type = ET_CORE;
1252 	elf->e_machine = ELF_ARCH;
1253 	elf->e_version = EV_CURRENT;
1254 	elf->e_entry = 0;
1255 	elf->e_phoff = sizeof(struct elfhdr);
1256 	elf->e_shoff = 0;
1257 	elf->e_flags = ELF_CORE_EFLAGS;
1258 	elf->e_ehsize = sizeof(struct elfhdr);
1259 	elf->e_phentsize = sizeof(struct elf_phdr);
1260 	elf->e_phnum = segs;
1261 	elf->e_shentsize = 0;
1262 	elf->e_shnum = 0;
1263 	elf->e_shstrndx = 0;
1264 	return;
1265 }
1266 
1267 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1268 {
1269 	phdr->p_type = PT_NOTE;
1270 	phdr->p_offset = offset;
1271 	phdr->p_vaddr = 0;
1272 	phdr->p_paddr = 0;
1273 	phdr->p_filesz = sz;
1274 	phdr->p_memsz = 0;
1275 	phdr->p_flags = 0;
1276 	phdr->p_align = 0;
1277 	return;
1278 }
1279 
1280 static void fill_note(struct memelfnote *note, const char *name, int type,
1281 		unsigned int sz, void *data)
1282 {
1283 	note->name = name;
1284 	note->type = type;
1285 	note->datasz = sz;
1286 	note->data = data;
1287 	return;
1288 }
1289 
1290 /*
1291  * fill up all the fields in prstatus from the given task struct, except
1292  * registers which need to be filled up separately.
1293  */
1294 static void fill_prstatus(struct elf_prstatus *prstatus,
1295 		struct task_struct *p, long signr)
1296 {
1297 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1298 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1299 	prstatus->pr_sighold = p->blocked.sig[0];
1300 	prstatus->pr_pid = p->pid;
1301 	prstatus->pr_ppid = p->parent->pid;
1302 	prstatus->pr_pgrp = process_group(p);
1303 	prstatus->pr_sid = p->signal->session;
1304 	if (thread_group_leader(p)) {
1305 		/*
1306 		 * This is the record for the group leader.  Add in the
1307 		 * cumulative times of previous dead threads.  This total
1308 		 * won't include the time of each live thread whose state
1309 		 * is included in the core dump.  The final total reported
1310 		 * to our parent process when it calls wait4 will include
1311 		 * those sums as well as the little bit more time it takes
1312 		 * this and each other thread to finish dying after the
1313 		 * core dump synchronization phase.
1314 		 */
1315 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1316 				   &prstatus->pr_utime);
1317 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1318 				   &prstatus->pr_stime);
1319 	} else {
1320 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1321 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1322 	}
1323 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1324 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1325 }
1326 
1327 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1328 		       struct mm_struct *mm)
1329 {
1330 	unsigned int i, len;
1331 
1332 	/* first copy the parameters from user space */
1333 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1334 
1335 	len = mm->arg_end - mm->arg_start;
1336 	if (len >= ELF_PRARGSZ)
1337 		len = ELF_PRARGSZ-1;
1338 	if (copy_from_user(&psinfo->pr_psargs,
1339 		           (const char __user *)mm->arg_start, len))
1340 		return -EFAULT;
1341 	for(i = 0; i < len; i++)
1342 		if (psinfo->pr_psargs[i] == 0)
1343 			psinfo->pr_psargs[i] = ' ';
1344 	psinfo->pr_psargs[len] = 0;
1345 
1346 	psinfo->pr_pid = p->pid;
1347 	psinfo->pr_ppid = p->parent->pid;
1348 	psinfo->pr_pgrp = process_group(p);
1349 	psinfo->pr_sid = p->signal->session;
1350 
1351 	i = p->state ? ffz(~p->state) + 1 : 0;
1352 	psinfo->pr_state = i;
1353 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1354 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1355 	psinfo->pr_nice = task_nice(p);
1356 	psinfo->pr_flag = p->flags;
1357 	SET_UID(psinfo->pr_uid, p->uid);
1358 	SET_GID(psinfo->pr_gid, p->gid);
1359 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1360 
1361 	return 0;
1362 }
1363 
1364 /* Here is the structure in which status of each thread is captured. */
1365 struct elf_thread_status
1366 {
1367 	struct list_head list;
1368 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1369 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1370 	struct task_struct *thread;
1371 #ifdef ELF_CORE_COPY_XFPREGS
1372 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1373 #endif
1374 	struct memelfnote notes[3];
1375 	int num_notes;
1376 };
1377 
1378 /*
1379  * In order to add the specific thread information for the elf file format,
1380  * we need to keep a linked list of every threads pr_status and then create
1381  * a single section for them in the final core file.
1382  */
1383 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1384 {
1385 	int sz = 0;
1386 	struct task_struct *p = t->thread;
1387 	t->num_notes = 0;
1388 
1389 	fill_prstatus(&t->prstatus, p, signr);
1390 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1391 
1392 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1393 		  &(t->prstatus));
1394 	t->num_notes++;
1395 	sz += notesize(&t->notes[0]);
1396 
1397 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1398 								&t->fpu))) {
1399 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1400 			  &(t->fpu));
1401 		t->num_notes++;
1402 		sz += notesize(&t->notes[1]);
1403 	}
1404 
1405 #ifdef ELF_CORE_COPY_XFPREGS
1406 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1407 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1408 			  &t->xfpu);
1409 		t->num_notes++;
1410 		sz += notesize(&t->notes[2]);
1411 	}
1412 #endif
1413 	return sz;
1414 }
1415 
1416 /*
1417  * Actual dumper
1418  *
1419  * This is a two-pass process; first we find the offsets of the bits,
1420  * and then they are actually written out.  If we run out of core limit
1421  * we just truncate.
1422  */
1423 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1424 {
1425 #define	NUM_NOTES	6
1426 	int has_dumped = 0;
1427 	mm_segment_t fs;
1428 	int segs;
1429 	size_t size = 0;
1430 	int i;
1431 	struct vm_area_struct *vma;
1432 	struct elfhdr *elf = NULL;
1433 	off_t offset = 0, dataoff;
1434 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1435 	int numnote;
1436 	struct memelfnote *notes = NULL;
1437 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1438 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1439  	struct task_struct *g, *p;
1440  	LIST_HEAD(thread_list);
1441  	struct list_head *t;
1442 	elf_fpregset_t *fpu = NULL;
1443 #ifdef ELF_CORE_COPY_XFPREGS
1444 	elf_fpxregset_t *xfpu = NULL;
1445 #endif
1446 	int thread_status_size = 0;
1447 	elf_addr_t *auxv;
1448 
1449 	/*
1450 	 * We no longer stop all VM operations.
1451 	 *
1452 	 * This is because those proceses that could possibly change map_count
1453 	 * or the mmap / vma pages are now blocked in do_exit on current
1454 	 * finishing this core dump.
1455 	 *
1456 	 * Only ptrace can touch these memory addresses, but it doesn't change
1457 	 * the map_count or the pages allocated. So no possibility of crashing
1458 	 * exists while dumping the mm->vm_next areas to the core file.
1459 	 */
1460 
1461 	/* alloc memory for large data structures: too large to be on stack */
1462 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1463 	if (!elf)
1464 		goto cleanup;
1465 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1466 	if (!prstatus)
1467 		goto cleanup;
1468 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1469 	if (!psinfo)
1470 		goto cleanup;
1471 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1472 	if (!notes)
1473 		goto cleanup;
1474 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1475 	if (!fpu)
1476 		goto cleanup;
1477 #ifdef ELF_CORE_COPY_XFPREGS
1478 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1479 	if (!xfpu)
1480 		goto cleanup;
1481 #endif
1482 
1483 	if (signr) {
1484 		struct elf_thread_status *tmp;
1485 		read_lock(&tasklist_lock);
1486 		do_each_thread(g,p)
1487 			if (current->mm == p->mm && current != p) {
1488 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1489 				if (!tmp) {
1490 					read_unlock(&tasklist_lock);
1491 					goto cleanup;
1492 				}
1493 				INIT_LIST_HEAD(&tmp->list);
1494 				tmp->thread = p;
1495 				list_add(&tmp->list, &thread_list);
1496 			}
1497 		while_each_thread(g,p);
1498 		read_unlock(&tasklist_lock);
1499 		list_for_each(t, &thread_list) {
1500 			struct elf_thread_status *tmp;
1501 			int sz;
1502 
1503 			tmp = list_entry(t, struct elf_thread_status, list);
1504 			sz = elf_dump_thread_status(signr, tmp);
1505 			thread_status_size += sz;
1506 		}
1507 	}
1508 	/* now collect the dump for the current */
1509 	memset(prstatus, 0, sizeof(*prstatus));
1510 	fill_prstatus(prstatus, current, signr);
1511 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1512 
1513 	segs = current->mm->map_count;
1514 #ifdef ELF_CORE_EXTRA_PHDRS
1515 	segs += ELF_CORE_EXTRA_PHDRS;
1516 #endif
1517 
1518 	/* Set up header */
1519 	fill_elf_header(elf, segs + 1);	/* including notes section */
1520 
1521 	has_dumped = 1;
1522 	current->flags |= PF_DUMPCORE;
1523 
1524 	/*
1525 	 * Set up the notes in similar form to SVR4 core dumps made
1526 	 * with info from their /proc.
1527 	 */
1528 
1529 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1530 	fill_psinfo(psinfo, current->group_leader, current->mm);
1531 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1532 
1533 	numnote = 2;
1534 
1535 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1536 
1537 	i = 0;
1538 	do
1539 		i += 2;
1540 	while (auxv[i - 2] != AT_NULL);
1541 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1542 		  i * sizeof(elf_addr_t), auxv);
1543 
1544   	/* Try to dump the FPU. */
1545 	if ((prstatus->pr_fpvalid =
1546 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1547 		fill_note(notes + numnote++,
1548 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1549 #ifdef ELF_CORE_COPY_XFPREGS
1550 	if (elf_core_copy_task_xfpregs(current, xfpu))
1551 		fill_note(notes + numnote++,
1552 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1553 #endif
1554 
1555 	fs = get_fs();
1556 	set_fs(KERNEL_DS);
1557 
1558 	DUMP_WRITE(elf, sizeof(*elf));
1559 	offset += sizeof(*elf);				/* Elf header */
1560 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
1561 
1562 	/* Write notes phdr entry */
1563 	{
1564 		struct elf_phdr phdr;
1565 		int sz = 0;
1566 
1567 		for (i = 0; i < numnote; i++)
1568 			sz += notesize(notes + i);
1569 
1570 		sz += thread_status_size;
1571 
1572 		fill_elf_note_phdr(&phdr, sz, offset);
1573 		offset += sz;
1574 		DUMP_WRITE(&phdr, sizeof(phdr));
1575 	}
1576 
1577 	/* Page-align dumped data */
1578 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1579 
1580 	/* Write program headers for segments dump */
1581 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1582 		struct elf_phdr phdr;
1583 		size_t sz;
1584 
1585 		sz = vma->vm_end - vma->vm_start;
1586 
1587 		phdr.p_type = PT_LOAD;
1588 		phdr.p_offset = offset;
1589 		phdr.p_vaddr = vma->vm_start;
1590 		phdr.p_paddr = 0;
1591 		phdr.p_filesz = maydump(vma) ? sz : 0;
1592 		phdr.p_memsz = sz;
1593 		offset += phdr.p_filesz;
1594 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1595 		if (vma->vm_flags & VM_WRITE)
1596 			phdr.p_flags |= PF_W;
1597 		if (vma->vm_flags & VM_EXEC)
1598 			phdr.p_flags |= PF_X;
1599 		phdr.p_align = ELF_EXEC_PAGESIZE;
1600 
1601 		DUMP_WRITE(&phdr, sizeof(phdr));
1602 	}
1603 
1604 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1605 	ELF_CORE_WRITE_EXTRA_PHDRS;
1606 #endif
1607 
1608  	/* write out the notes section */
1609 	for (i = 0; i < numnote; i++)
1610 		if (!writenote(notes + i, file))
1611 			goto end_coredump;
1612 
1613 	/* write out the thread status notes section */
1614 	list_for_each(t, &thread_list) {
1615 		struct elf_thread_status *tmp =
1616 				list_entry(t, struct elf_thread_status, list);
1617 
1618 		for (i = 0; i < tmp->num_notes; i++)
1619 			if (!writenote(&tmp->notes[i], file))
1620 				goto end_coredump;
1621 	}
1622 
1623 	DUMP_SEEK(dataoff);
1624 
1625 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1626 		unsigned long addr;
1627 
1628 		if (!maydump(vma))
1629 			continue;
1630 
1631 		for (addr = vma->vm_start;
1632 		     addr < vma->vm_end;
1633 		     addr += PAGE_SIZE) {
1634 			struct page *page;
1635 			struct vm_area_struct *vma;
1636 
1637 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1638 						&page, &vma) <= 0) {
1639 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
1640 			} else {
1641 				if (page == ZERO_PAGE(addr)) {
1642 					DUMP_SEEK(file->f_pos + PAGE_SIZE);
1643 				} else {
1644 					void *kaddr;
1645 					flush_cache_page(vma, addr,
1646 							 page_to_pfn(page));
1647 					kaddr = kmap(page);
1648 					if ((size += PAGE_SIZE) > limit ||
1649 					    !dump_write(file, kaddr,
1650 					    PAGE_SIZE)) {
1651 						kunmap(page);
1652 						page_cache_release(page);
1653 						goto end_coredump;
1654 					}
1655 					kunmap(page);
1656 				}
1657 				page_cache_release(page);
1658 			}
1659 		}
1660 	}
1661 
1662 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1663 	ELF_CORE_WRITE_EXTRA_DATA;
1664 #endif
1665 
1666 	if ((off_t)file->f_pos != offset) {
1667 		/* Sanity check */
1668 		printk(KERN_WARNING
1669 		       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1670 		       (off_t)file->f_pos, offset);
1671 	}
1672 
1673 end_coredump:
1674 	set_fs(fs);
1675 
1676 cleanup:
1677 	while (!list_empty(&thread_list)) {
1678 		struct list_head *tmp = thread_list.next;
1679 		list_del(tmp);
1680 		kfree(list_entry(tmp, struct elf_thread_status, list));
1681 	}
1682 
1683 	kfree(elf);
1684 	kfree(prstatus);
1685 	kfree(psinfo);
1686 	kfree(notes);
1687 	kfree(fpu);
1688 #ifdef ELF_CORE_COPY_XFPREGS
1689 	kfree(xfpu);
1690 #endif
1691 	return has_dumped;
1692 #undef NUM_NOTES
1693 }
1694 
1695 #endif		/* USE_ELF_CORE_DUMP */
1696 
1697 static int __init init_elf_binfmt(void)
1698 {
1699 	return register_binfmt(&elf_format);
1700 }
1701 
1702 static void __exit exit_elf_binfmt(void)
1703 {
1704 	/* Remove the COFF and ELF loaders. */
1705 	unregister_binfmt(&elf_format);
1706 }
1707 
1708 core_initcall(init_elf_binfmt);
1709 module_exit(exit_elf_binfmt);
1710 MODULE_LICENSE("GPL");
1711