xref: /linux/fs/binfmt_elf.c (revision b0148a98ec5151fec82064d95f11eb9efbc628ea)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump	NULL
58 #endif
59 
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN	PAGE_SIZE
64 #endif
65 
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS	0
68 #endif
69 
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73 
74 static struct linux_binfmt elf_format = {
75 		.module		= THIS_MODULE,
76 		.load_binary	= load_elf_binary,
77 		.load_shlib	= load_elf_library,
78 		.core_dump	= elf_core_dump,
79 		.min_coredump	= ELF_EXEC_PAGESIZE
80 };
81 
82 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
83 
84 static int set_brk(unsigned long start, unsigned long end)
85 {
86 	start = ELF_PAGEALIGN(start);
87 	end = ELF_PAGEALIGN(end);
88 	if (end > start) {
89 		unsigned long addr;
90 		down_write(&current->mm->mmap_sem);
91 		addr = do_brk(start, end - start);
92 		up_write(&current->mm->mmap_sem);
93 		if (BAD_ADDR(addr))
94 			return addr;
95 	}
96 	current->mm->start_brk = current->mm->brk = end;
97 	return 0;
98 }
99 
100 /* We need to explicitly zero any fractional pages
101    after the data section (i.e. bss).  This would
102    contain the junk from the file that should not
103    be in memory
104  */
105 static int padzero(unsigned long elf_bss)
106 {
107 	unsigned long nbyte;
108 
109 	nbyte = ELF_PAGEOFFSET(elf_bss);
110 	if (nbyte) {
111 		nbyte = ELF_MIN_ALIGN - nbyte;
112 		if (clear_user((void __user *) elf_bss, nbyte))
113 			return -EFAULT;
114 	}
115 	return 0;
116 }
117 
118 /* Let's use some macros to make this stack manipulation a litle clearer */
119 #ifdef CONFIG_STACK_GROWSUP
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
121 #define STACK_ROUND(sp, items) \
122 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ \
124 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
125 	old_sp; })
126 #else
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
128 #define STACK_ROUND(sp, items) \
129 	(((unsigned long) (sp - items)) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
131 #endif
132 
133 static int
134 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
135 		int interp_aout, unsigned long load_addr,
136 		unsigned long interp_load_addr)
137 {
138 	unsigned long p = bprm->p;
139 	int argc = bprm->argc;
140 	int envc = bprm->envc;
141 	elf_addr_t __user *argv;
142 	elf_addr_t __user *envp;
143 	elf_addr_t __user *sp;
144 	elf_addr_t __user *u_platform;
145 	const char *k_platform = ELF_PLATFORM;
146 	int items;
147 	elf_addr_t *elf_info;
148 	int ei_index = 0;
149 	struct task_struct *tsk = current;
150 
151 	/*
152 	 * If this architecture has a platform capability string, copy it
153 	 * to userspace.  In some cases (Sparc), this info is impossible
154 	 * for userspace to get any other way, in others (i386) it is
155 	 * merely difficult.
156 	 */
157 	u_platform = NULL;
158 	if (k_platform) {
159 		size_t len = strlen(k_platform) + 1;
160 
161 		/*
162 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
163 		 * evictions by the processes running on the same package. One
164 		 * thing we can do is to shuffle the initial stack for them.
165 		 */
166 
167 		p = arch_align_stack(p);
168 
169 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
170 		if (__copy_to_user(u_platform, k_platform, len))
171 			return -EFAULT;
172 	}
173 
174 	/* Create the ELF interpreter info */
175 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
176 #define NEW_AUX_ENT(id, val) \
177 	do { \
178 		elf_info[ei_index++] = id; \
179 		elf_info[ei_index++] = val; \
180 	} while (0)
181 
182 #ifdef ARCH_DLINFO
183 	/*
184 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
185 	 * AUXV.
186 	 */
187 	ARCH_DLINFO;
188 #endif
189 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
190 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
191 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
192 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
193 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
194 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
195 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
196 	NEW_AUX_ENT(AT_FLAGS, 0);
197 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
198 	NEW_AUX_ENT(AT_UID, tsk->uid);
199 	NEW_AUX_ENT(AT_EUID, tsk->euid);
200 	NEW_AUX_ENT(AT_GID, tsk->gid);
201 	NEW_AUX_ENT(AT_EGID, tsk->egid);
202  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
203 	if (k_platform) {
204 		NEW_AUX_ENT(AT_PLATFORM,
205 			    (elf_addr_t)(unsigned long)u_platform);
206 	}
207 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
208 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
209 	}
210 #undef NEW_AUX_ENT
211 	/* AT_NULL is zero; clear the rest too */
212 	memset(&elf_info[ei_index], 0,
213 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
214 
215 	/* And advance past the AT_NULL entry.  */
216 	ei_index += 2;
217 
218 	sp = STACK_ADD(p, ei_index);
219 
220 	items = (argc + 1) + (envc + 1);
221 	if (interp_aout) {
222 		items += 3; /* a.out interpreters require argv & envp too */
223 	} else {
224 		items += 1; /* ELF interpreters only put argc on the stack */
225 	}
226 	bprm->p = STACK_ROUND(sp, items);
227 
228 	/* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233 	sp = (elf_addr_t __user *)bprm->p;
234 #endif
235 
236 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
237 	if (__put_user(argc, sp++))
238 		return -EFAULT;
239 	if (interp_aout) {
240 		argv = sp + 2;
241 		envp = argv + argc + 1;
242 		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
243 		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
244 			return -EFAULT;
245 	} else {
246 		argv = sp;
247 		envp = argv + argc + 1;
248 	}
249 
250 	/* Populate argv and envp */
251 	p = current->mm->arg_end = current->mm->arg_start;
252 	while (argc-- > 0) {
253 		size_t len;
254 		if (__put_user((elf_addr_t)p, argv++))
255 			return -EFAULT;
256 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
257 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
258 			return 0;
259 		p += len;
260 	}
261 	if (__put_user(0, argv))
262 		return -EFAULT;
263 	current->mm->arg_end = current->mm->env_start = p;
264 	while (envc-- > 0) {
265 		size_t len;
266 		if (__put_user((elf_addr_t)p, envp++))
267 			return -EFAULT;
268 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
269 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
270 			return 0;
271 		p += len;
272 	}
273 	if (__put_user(0, envp))
274 		return -EFAULT;
275 	current->mm->env_end = p;
276 
277 	/* Put the elf_info on the stack in the right place.  */
278 	sp = (elf_addr_t __user *)envp + 1;
279 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
280 		return -EFAULT;
281 	return 0;
282 }
283 
284 #ifndef elf_map
285 
286 static unsigned long elf_map(struct file *filep, unsigned long addr,
287 		struct elf_phdr *eppnt, int prot, int type)
288 {
289 	unsigned long map_addr;
290 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
291 
292 	down_write(&current->mm->mmap_sem);
293 	/* mmap() will return -EINVAL if given a zero size, but a
294 	 * segment with zero filesize is perfectly valid */
295 	if (eppnt->p_filesz + pageoffset)
296 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
297 				   eppnt->p_filesz + pageoffset, prot, type,
298 				   eppnt->p_offset - pageoffset);
299 	else
300 		map_addr = ELF_PAGESTART(addr);
301 	up_write(&current->mm->mmap_sem);
302 	return(map_addr);
303 }
304 
305 #endif /* !elf_map */
306 
307 /* This is much more generalized than the library routine read function,
308    so we keep this separate.  Technically the library read function
309    is only provided so that we can read a.out libraries that have
310    an ELF header */
311 
312 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
313 		struct file *interpreter, unsigned long *interp_load_addr)
314 {
315 	struct elf_phdr *elf_phdata;
316 	struct elf_phdr *eppnt;
317 	unsigned long load_addr = 0;
318 	int load_addr_set = 0;
319 	unsigned long last_bss = 0, elf_bss = 0;
320 	unsigned long error = ~0UL;
321 	int retval, i, size;
322 
323 	/* First of all, some simple consistency checks */
324 	if (interp_elf_ex->e_type != ET_EXEC &&
325 	    interp_elf_ex->e_type != ET_DYN)
326 		goto out;
327 	if (!elf_check_arch(interp_elf_ex))
328 		goto out;
329 	if (!interpreter->f_op || !interpreter->f_op->mmap)
330 		goto out;
331 
332 	/*
333 	 * If the size of this structure has changed, then punt, since
334 	 * we will be doing the wrong thing.
335 	 */
336 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
337 		goto out;
338 	if (interp_elf_ex->e_phnum < 1 ||
339 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
340 		goto out;
341 
342 	/* Now read in all of the header information */
343 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
344 	if (size > ELF_MIN_ALIGN)
345 		goto out;
346 	elf_phdata = kmalloc(size, GFP_KERNEL);
347 	if (!elf_phdata)
348 		goto out;
349 
350 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
351 			     (char *)elf_phdata,size);
352 	error = -EIO;
353 	if (retval != size) {
354 		if (retval < 0)
355 			error = retval;
356 		goto out_close;
357 	}
358 
359 	eppnt = elf_phdata;
360 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
361 		if (eppnt->p_type == PT_LOAD) {
362 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
363 			int elf_prot = 0;
364 			unsigned long vaddr = 0;
365 			unsigned long k, map_addr;
366 
367 			if (eppnt->p_flags & PF_R)
368 		    		elf_prot = PROT_READ;
369 			if (eppnt->p_flags & PF_W)
370 				elf_prot |= PROT_WRITE;
371 			if (eppnt->p_flags & PF_X)
372 				elf_prot |= PROT_EXEC;
373 			vaddr = eppnt->p_vaddr;
374 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
375 				elf_type |= MAP_FIXED;
376 
377 			map_addr = elf_map(interpreter, load_addr + vaddr,
378 					   eppnt, elf_prot, elf_type);
379 			error = map_addr;
380 			if (BAD_ADDR(map_addr))
381 				goto out_close;
382 
383 			if (!load_addr_set &&
384 			    interp_elf_ex->e_type == ET_DYN) {
385 				load_addr = map_addr - ELF_PAGESTART(vaddr);
386 				load_addr_set = 1;
387 			}
388 
389 			/*
390 			 * Check to see if the section's size will overflow the
391 			 * allowed task size. Note that p_filesz must always be
392 			 * <= p_memsize so it's only necessary to check p_memsz.
393 			 */
394 			k = load_addr + eppnt->p_vaddr;
395 			if (BAD_ADDR(k) ||
396 			    eppnt->p_filesz > eppnt->p_memsz ||
397 			    eppnt->p_memsz > TASK_SIZE ||
398 			    TASK_SIZE - eppnt->p_memsz < k) {
399 				error = -ENOMEM;
400 				goto out_close;
401 			}
402 
403 			/*
404 			 * Find the end of the file mapping for this phdr, and
405 			 * keep track of the largest address we see for this.
406 			 */
407 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
408 			if (k > elf_bss)
409 				elf_bss = k;
410 
411 			/*
412 			 * Do the same thing for the memory mapping - between
413 			 * elf_bss and last_bss is the bss section.
414 			 */
415 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
416 			if (k > last_bss)
417 				last_bss = k;
418 		}
419 	}
420 
421 	/*
422 	 * Now fill out the bss section.  First pad the last page up
423 	 * to the page boundary, and then perform a mmap to make sure
424 	 * that there are zero-mapped pages up to and including the
425 	 * last bss page.
426 	 */
427 	if (padzero(elf_bss)) {
428 		error = -EFAULT;
429 		goto out_close;
430 	}
431 
432 	/* What we have mapped so far */
433 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
434 
435 	/* Map the last of the bss segment */
436 	if (last_bss > elf_bss) {
437 		down_write(&current->mm->mmap_sem);
438 		error = do_brk(elf_bss, last_bss - elf_bss);
439 		up_write(&current->mm->mmap_sem);
440 		if (BAD_ADDR(error))
441 			goto out_close;
442 	}
443 
444 	*interp_load_addr = load_addr;
445 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
446 
447 out_close:
448 	kfree(elf_phdata);
449 out:
450 	return error;
451 }
452 
453 static unsigned long load_aout_interp(struct exec *interp_ex,
454 		struct file *interpreter)
455 {
456 	unsigned long text_data, elf_entry = ~0UL;
457 	char __user * addr;
458 	loff_t offset;
459 
460 	current->mm->end_code = interp_ex->a_text;
461 	text_data = interp_ex->a_text + interp_ex->a_data;
462 	current->mm->end_data = text_data;
463 	current->mm->brk = interp_ex->a_bss + text_data;
464 
465 	switch (N_MAGIC(*interp_ex)) {
466 	case OMAGIC:
467 		offset = 32;
468 		addr = (char __user *)0;
469 		break;
470 	case ZMAGIC:
471 	case QMAGIC:
472 		offset = N_TXTOFF(*interp_ex);
473 		addr = (char __user *)N_TXTADDR(*interp_ex);
474 		break;
475 	default:
476 		goto out;
477 	}
478 
479 	down_write(&current->mm->mmap_sem);
480 	do_brk(0, text_data);
481 	up_write(&current->mm->mmap_sem);
482 	if (!interpreter->f_op || !interpreter->f_op->read)
483 		goto out;
484 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
485 		goto out;
486 	flush_icache_range((unsigned long)addr,
487 	                   (unsigned long)addr + text_data);
488 
489 	down_write(&current->mm->mmap_sem);
490 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
491 		interp_ex->a_bss);
492 	up_write(&current->mm->mmap_sem);
493 	elf_entry = interp_ex->a_entry;
494 
495 out:
496 	return elf_entry;
497 }
498 
499 /*
500  * These are the functions used to load ELF style executables and shared
501  * libraries.  There is no binary dependent code anywhere else.
502  */
503 
504 #define INTERPRETER_NONE 0
505 #define INTERPRETER_AOUT 1
506 #define INTERPRETER_ELF 2
507 
508 #ifndef STACK_RND_MASK
509 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
510 #endif
511 
512 static unsigned long randomize_stack_top(unsigned long stack_top)
513 {
514 	unsigned int random_variable = 0;
515 
516 	if ((current->flags & PF_RANDOMIZE) &&
517 		!(current->personality & ADDR_NO_RANDOMIZE)) {
518 		random_variable = get_random_int() & STACK_RND_MASK;
519 		random_variable <<= PAGE_SHIFT;
520 	}
521 #ifdef CONFIG_STACK_GROWSUP
522 	return PAGE_ALIGN(stack_top) + random_variable;
523 #else
524 	return PAGE_ALIGN(stack_top) - random_variable;
525 #endif
526 }
527 
528 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
529 {
530 	struct file *interpreter = NULL; /* to shut gcc up */
531  	unsigned long load_addr = 0, load_bias = 0;
532 	int load_addr_set = 0;
533 	char * elf_interpreter = NULL;
534 	unsigned int interpreter_type = INTERPRETER_NONE;
535 	unsigned char ibcs2_interpreter = 0;
536 	unsigned long error;
537 	struct elf_phdr *elf_ppnt, *elf_phdata;
538 	unsigned long elf_bss, elf_brk;
539 	int elf_exec_fileno;
540 	int retval, i;
541 	unsigned int size;
542 	unsigned long elf_entry, interp_load_addr = 0;
543 	unsigned long start_code, end_code, start_data, end_data;
544 	unsigned long reloc_func_desc = 0;
545 	char passed_fileno[6];
546 	struct files_struct *files;
547 	int executable_stack = EXSTACK_DEFAULT;
548 	unsigned long def_flags = 0;
549 	struct {
550 		struct elfhdr elf_ex;
551 		struct elfhdr interp_elf_ex;
552   		struct exec interp_ex;
553 	} *loc;
554 
555 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
556 	if (!loc) {
557 		retval = -ENOMEM;
558 		goto out_ret;
559 	}
560 
561 	/* Get the exec-header */
562 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
563 
564 	retval = -ENOEXEC;
565 	/* First of all, some simple consistency checks */
566 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
567 		goto out;
568 
569 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
570 		goto out;
571 	if (!elf_check_arch(&loc->elf_ex))
572 		goto out;
573 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
574 		goto out;
575 
576 	/* Now read in all of the header information */
577 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
578 		goto out;
579 	if (loc->elf_ex.e_phnum < 1 ||
580 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
581 		goto out;
582 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
583 	retval = -ENOMEM;
584 	elf_phdata = kmalloc(size, GFP_KERNEL);
585 	if (!elf_phdata)
586 		goto out;
587 
588 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
589 			     (char *)elf_phdata, size);
590 	if (retval != size) {
591 		if (retval >= 0)
592 			retval = -EIO;
593 		goto out_free_ph;
594 	}
595 
596 	files = current->files;	/* Refcounted so ok */
597 	retval = unshare_files();
598 	if (retval < 0)
599 		goto out_free_ph;
600 	if (files == current->files) {
601 		put_files_struct(files);
602 		files = NULL;
603 	}
604 
605 	/* exec will make our files private anyway, but for the a.out
606 	   loader stuff we need to do it earlier */
607 	retval = get_unused_fd();
608 	if (retval < 0)
609 		goto out_free_fh;
610 	get_file(bprm->file);
611 	fd_install(elf_exec_fileno = retval, bprm->file);
612 
613 	elf_ppnt = elf_phdata;
614 	elf_bss = 0;
615 	elf_brk = 0;
616 
617 	start_code = ~0UL;
618 	end_code = 0;
619 	start_data = 0;
620 	end_data = 0;
621 
622 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
623 		if (elf_ppnt->p_type == PT_INTERP) {
624 			/* This is the program interpreter used for
625 			 * shared libraries - for now assume that this
626 			 * is an a.out format binary
627 			 */
628 			retval = -ENOEXEC;
629 			if (elf_ppnt->p_filesz > PATH_MAX ||
630 			    elf_ppnt->p_filesz < 2)
631 				goto out_free_file;
632 
633 			retval = -ENOMEM;
634 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
635 						  GFP_KERNEL);
636 			if (!elf_interpreter)
637 				goto out_free_file;
638 
639 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
640 					     elf_interpreter,
641 					     elf_ppnt->p_filesz);
642 			if (retval != elf_ppnt->p_filesz) {
643 				if (retval >= 0)
644 					retval = -EIO;
645 				goto out_free_interp;
646 			}
647 			/* make sure path is NULL terminated */
648 			retval = -ENOEXEC;
649 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
650 				goto out_free_interp;
651 
652 			/* If the program interpreter is one of these two,
653 			 * then assume an iBCS2 image. Otherwise assume
654 			 * a native linux image.
655 			 */
656 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
657 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
658 				ibcs2_interpreter = 1;
659 
660 			/*
661 			 * The early SET_PERSONALITY here is so that the lookup
662 			 * for the interpreter happens in the namespace of the
663 			 * to-be-execed image.  SET_PERSONALITY can select an
664 			 * alternate root.
665 			 *
666 			 * However, SET_PERSONALITY is NOT allowed to switch
667 			 * this task into the new images's memory mapping
668 			 * policy - that is, TASK_SIZE must still evaluate to
669 			 * that which is appropriate to the execing application.
670 			 * This is because exit_mmap() needs to have TASK_SIZE
671 			 * evaluate to the size of the old image.
672 			 *
673 			 * So if (say) a 64-bit application is execing a 32-bit
674 			 * application it is the architecture's responsibility
675 			 * to defer changing the value of TASK_SIZE until the
676 			 * switch really is going to happen - do this in
677 			 * flush_thread().	- akpm
678 			 */
679 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
680 
681 			interpreter = open_exec(elf_interpreter);
682 			retval = PTR_ERR(interpreter);
683 			if (IS_ERR(interpreter))
684 				goto out_free_interp;
685 
686 			/*
687 			 * If the binary is not readable then enforce
688 			 * mm->dumpable = 0 regardless of the interpreter's
689 			 * permissions.
690 			 */
691 			if (file_permission(interpreter, MAY_READ) < 0)
692 				bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
693 
694 			retval = kernel_read(interpreter, 0, bprm->buf,
695 					     BINPRM_BUF_SIZE);
696 			if (retval != BINPRM_BUF_SIZE) {
697 				if (retval >= 0)
698 					retval = -EIO;
699 				goto out_free_dentry;
700 			}
701 
702 			/* Get the exec headers */
703 			loc->interp_ex = *((struct exec *)bprm->buf);
704 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
705 			break;
706 		}
707 		elf_ppnt++;
708 	}
709 
710 	elf_ppnt = elf_phdata;
711 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
712 		if (elf_ppnt->p_type == PT_GNU_STACK) {
713 			if (elf_ppnt->p_flags & PF_X)
714 				executable_stack = EXSTACK_ENABLE_X;
715 			else
716 				executable_stack = EXSTACK_DISABLE_X;
717 			break;
718 		}
719 
720 	/* Some simple consistency checks for the interpreter */
721 	if (elf_interpreter) {
722 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
723 
724 		/* Now figure out which format our binary is */
725 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
726 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
727 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
728 			interpreter_type = INTERPRETER_ELF;
729 
730 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
731 			interpreter_type &= ~INTERPRETER_ELF;
732 
733 		retval = -ELIBBAD;
734 		if (!interpreter_type)
735 			goto out_free_dentry;
736 
737 		/* Make sure only one type was selected */
738 		if ((interpreter_type & INTERPRETER_ELF) &&
739 		     interpreter_type != INTERPRETER_ELF) {
740 	     		// FIXME - ratelimit this before re-enabling
741 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
742 			interpreter_type = INTERPRETER_ELF;
743 		}
744 		/* Verify the interpreter has a valid arch */
745 		if ((interpreter_type == INTERPRETER_ELF) &&
746 		    !elf_check_arch(&loc->interp_elf_ex))
747 			goto out_free_dentry;
748 	} else {
749 		/* Executables without an interpreter also need a personality  */
750 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
751 	}
752 
753 	/* OK, we are done with that, now set up the arg stuff,
754 	   and then start this sucker up */
755 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
756 		char *passed_p = passed_fileno;
757 		sprintf(passed_fileno, "%d", elf_exec_fileno);
758 
759 		if (elf_interpreter) {
760 			retval = copy_strings_kernel(1, &passed_p, bprm);
761 			if (retval)
762 				goto out_free_dentry;
763 			bprm->argc++;
764 		}
765 	}
766 
767 	/* Flush all traces of the currently running executable */
768 	retval = flush_old_exec(bprm);
769 	if (retval)
770 		goto out_free_dentry;
771 
772 	/* Discard our unneeded old files struct */
773 	if (files) {
774 		put_files_struct(files);
775 		files = NULL;
776 	}
777 
778 	/* OK, This is the point of no return */
779 	current->mm->start_data = 0;
780 	current->mm->end_data = 0;
781 	current->mm->end_code = 0;
782 	current->mm->mmap = NULL;
783 	current->flags &= ~PF_FORKNOEXEC;
784 	current->mm->def_flags = def_flags;
785 
786 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
787 	   may depend on the personality.  */
788 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
789 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
790 		current->personality |= READ_IMPLIES_EXEC;
791 
792 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
793 		current->flags |= PF_RANDOMIZE;
794 	arch_pick_mmap_layout(current->mm);
795 
796 	/* Do this so that we can load the interpreter, if need be.  We will
797 	   change some of these later */
798 	current->mm->free_area_cache = current->mm->mmap_base;
799 	current->mm->cached_hole_size = 0;
800 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
801 				 executable_stack);
802 	if (retval < 0) {
803 		send_sig(SIGKILL, current, 0);
804 		goto out_free_dentry;
805 	}
806 
807 	current->mm->start_stack = bprm->p;
808 
809 	/* Now we do a little grungy work by mmaping the ELF image into
810 	   the correct location in memory.  At this point, we assume that
811 	   the image should be loaded at fixed address, not at a variable
812 	   address. */
813 	for(i = 0, elf_ppnt = elf_phdata;
814 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
815 		int elf_prot = 0, elf_flags;
816 		unsigned long k, vaddr;
817 
818 		if (elf_ppnt->p_type != PT_LOAD)
819 			continue;
820 
821 		if (unlikely (elf_brk > elf_bss)) {
822 			unsigned long nbyte;
823 
824 			/* There was a PT_LOAD segment with p_memsz > p_filesz
825 			   before this one. Map anonymous pages, if needed,
826 			   and clear the area.  */
827 			retval = set_brk (elf_bss + load_bias,
828 					  elf_brk + load_bias);
829 			if (retval) {
830 				send_sig(SIGKILL, current, 0);
831 				goto out_free_dentry;
832 			}
833 			nbyte = ELF_PAGEOFFSET(elf_bss);
834 			if (nbyte) {
835 				nbyte = ELF_MIN_ALIGN - nbyte;
836 				if (nbyte > elf_brk - elf_bss)
837 					nbyte = elf_brk - elf_bss;
838 				if (clear_user((void __user *)elf_bss +
839 							load_bias, nbyte)) {
840 					/*
841 					 * This bss-zeroing can fail if the ELF
842 					 * file specifies odd protections. So
843 					 * we don't check the return value
844 					 */
845 				}
846 			}
847 		}
848 
849 		if (elf_ppnt->p_flags & PF_R)
850 			elf_prot |= PROT_READ;
851 		if (elf_ppnt->p_flags & PF_W)
852 			elf_prot |= PROT_WRITE;
853 		if (elf_ppnt->p_flags & PF_X)
854 			elf_prot |= PROT_EXEC;
855 
856 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
857 
858 		vaddr = elf_ppnt->p_vaddr;
859 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
860 			elf_flags |= MAP_FIXED;
861 		} else if (loc->elf_ex.e_type == ET_DYN) {
862 			/* Try and get dynamic programs out of the way of the
863 			 * default mmap base, as well as whatever program they
864 			 * might try to exec.  This is because the brk will
865 			 * follow the loader, and is not movable.  */
866 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
867 		}
868 
869 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
870 				elf_prot, elf_flags);
871 		if (BAD_ADDR(error)) {
872 			send_sig(SIGKILL, current, 0);
873 			goto out_free_dentry;
874 		}
875 
876 		if (!load_addr_set) {
877 			load_addr_set = 1;
878 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
879 			if (loc->elf_ex.e_type == ET_DYN) {
880 				load_bias += error -
881 				             ELF_PAGESTART(load_bias + vaddr);
882 				load_addr += load_bias;
883 				reloc_func_desc = load_bias;
884 			}
885 		}
886 		k = elf_ppnt->p_vaddr;
887 		if (k < start_code)
888 			start_code = k;
889 		if (start_data < k)
890 			start_data = k;
891 
892 		/*
893 		 * Check to see if the section's size will overflow the
894 		 * allowed task size. Note that p_filesz must always be
895 		 * <= p_memsz so it is only necessary to check p_memsz.
896 		 */
897 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
898 		    elf_ppnt->p_memsz > TASK_SIZE ||
899 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
900 			/* set_brk can never work. Avoid overflows. */
901 			send_sig(SIGKILL, current, 0);
902 			goto out_free_dentry;
903 		}
904 
905 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
906 
907 		if (k > elf_bss)
908 			elf_bss = k;
909 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
910 			end_code = k;
911 		if (end_data < k)
912 			end_data = k;
913 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
914 		if (k > elf_brk)
915 			elf_brk = k;
916 	}
917 
918 	loc->elf_ex.e_entry += load_bias;
919 	elf_bss += load_bias;
920 	elf_brk += load_bias;
921 	start_code += load_bias;
922 	end_code += load_bias;
923 	start_data += load_bias;
924 	end_data += load_bias;
925 
926 	/* Calling set_brk effectively mmaps the pages that we need
927 	 * for the bss and break sections.  We must do this before
928 	 * mapping in the interpreter, to make sure it doesn't wind
929 	 * up getting placed where the bss needs to go.
930 	 */
931 	retval = set_brk(elf_bss, elf_brk);
932 	if (retval) {
933 		send_sig(SIGKILL, current, 0);
934 		goto out_free_dentry;
935 	}
936 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
937 		send_sig(SIGSEGV, current, 0);
938 		retval = -EFAULT; /* Nobody gets to see this, but.. */
939 		goto out_free_dentry;
940 	}
941 
942 	if (elf_interpreter) {
943 		if (interpreter_type == INTERPRETER_AOUT)
944 			elf_entry = load_aout_interp(&loc->interp_ex,
945 						     interpreter);
946 		else
947 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
948 						    interpreter,
949 						    &interp_load_addr);
950 		if (BAD_ADDR(elf_entry)) {
951 			force_sig(SIGSEGV, current);
952 			retval = IS_ERR((void *)elf_entry) ?
953 					(int)elf_entry : -EINVAL;
954 			goto out_free_dentry;
955 		}
956 		reloc_func_desc = interp_load_addr;
957 
958 		allow_write_access(interpreter);
959 		fput(interpreter);
960 		kfree(elf_interpreter);
961 	} else {
962 		elf_entry = loc->elf_ex.e_entry;
963 		if (BAD_ADDR(elf_entry)) {
964 			force_sig(SIGSEGV, current);
965 			retval = -EINVAL;
966 			goto out_free_dentry;
967 		}
968 	}
969 
970 	kfree(elf_phdata);
971 
972 	if (interpreter_type != INTERPRETER_AOUT)
973 		sys_close(elf_exec_fileno);
974 
975 	set_binfmt(&elf_format);
976 
977 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
978 	retval = arch_setup_additional_pages(bprm, executable_stack);
979 	if (retval < 0) {
980 		send_sig(SIGKILL, current, 0);
981 		goto out;
982 	}
983 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
984 
985 	compute_creds(bprm);
986 	current->flags &= ~PF_FORKNOEXEC;
987 	create_elf_tables(bprm, &loc->elf_ex,
988 			  (interpreter_type == INTERPRETER_AOUT),
989 			  load_addr, interp_load_addr);
990 	/* N.B. passed_fileno might not be initialized? */
991 	if (interpreter_type == INTERPRETER_AOUT)
992 		current->mm->arg_start += strlen(passed_fileno) + 1;
993 	current->mm->end_code = end_code;
994 	current->mm->start_code = start_code;
995 	current->mm->start_data = start_data;
996 	current->mm->end_data = end_data;
997 	current->mm->start_stack = bprm->p;
998 
999 	if (current->personality & MMAP_PAGE_ZERO) {
1000 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1001 		   and some applications "depend" upon this behavior.
1002 		   Since we do not have the power to recompile these, we
1003 		   emulate the SVr4 behavior. Sigh. */
1004 		down_write(&current->mm->mmap_sem);
1005 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1006 				MAP_FIXED | MAP_PRIVATE, 0);
1007 		up_write(&current->mm->mmap_sem);
1008 	}
1009 
1010 #ifdef ELF_PLAT_INIT
1011 	/*
1012 	 * The ABI may specify that certain registers be set up in special
1013 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1014 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1015 	 * that the e_entry field is the address of the function descriptor
1016 	 * for the startup routine, rather than the address of the startup
1017 	 * routine itself.  This macro performs whatever initialization to
1018 	 * the regs structure is required as well as any relocations to the
1019 	 * function descriptor entries when executing dynamically links apps.
1020 	 */
1021 	ELF_PLAT_INIT(regs, reloc_func_desc);
1022 #endif
1023 
1024 	start_thread(regs, elf_entry, bprm->p);
1025 	if (unlikely(current->ptrace & PT_PTRACED)) {
1026 		if (current->ptrace & PT_TRACE_EXEC)
1027 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1028 		else
1029 			send_sig(SIGTRAP, current, 0);
1030 	}
1031 	retval = 0;
1032 out:
1033 	kfree(loc);
1034 out_ret:
1035 	return retval;
1036 
1037 	/* error cleanup */
1038 out_free_dentry:
1039 	allow_write_access(interpreter);
1040 	if (interpreter)
1041 		fput(interpreter);
1042 out_free_interp:
1043 	kfree(elf_interpreter);
1044 out_free_file:
1045 	sys_close(elf_exec_fileno);
1046 out_free_fh:
1047 	if (files)
1048 		reset_files_struct(current, files);
1049 out_free_ph:
1050 	kfree(elf_phdata);
1051 	goto out;
1052 }
1053 
1054 /* This is really simpleminded and specialized - we are loading an
1055    a.out library that is given an ELF header. */
1056 static int load_elf_library(struct file *file)
1057 {
1058 	struct elf_phdr *elf_phdata;
1059 	struct elf_phdr *eppnt;
1060 	unsigned long elf_bss, bss, len;
1061 	int retval, error, i, j;
1062 	struct elfhdr elf_ex;
1063 
1064 	error = -ENOEXEC;
1065 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1066 	if (retval != sizeof(elf_ex))
1067 		goto out;
1068 
1069 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1070 		goto out;
1071 
1072 	/* First of all, some simple consistency checks */
1073 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1074 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1075 		goto out;
1076 
1077 	/* Now read in all of the header information */
1078 
1079 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1080 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1081 
1082 	error = -ENOMEM;
1083 	elf_phdata = kmalloc(j, GFP_KERNEL);
1084 	if (!elf_phdata)
1085 		goto out;
1086 
1087 	eppnt = elf_phdata;
1088 	error = -ENOEXEC;
1089 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1090 	if (retval != j)
1091 		goto out_free_ph;
1092 
1093 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1094 		if ((eppnt + i)->p_type == PT_LOAD)
1095 			j++;
1096 	if (j != 1)
1097 		goto out_free_ph;
1098 
1099 	while (eppnt->p_type != PT_LOAD)
1100 		eppnt++;
1101 
1102 	/* Now use mmap to map the library into memory. */
1103 	down_write(&current->mm->mmap_sem);
1104 	error = do_mmap(file,
1105 			ELF_PAGESTART(eppnt->p_vaddr),
1106 			(eppnt->p_filesz +
1107 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1108 			PROT_READ | PROT_WRITE | PROT_EXEC,
1109 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1110 			(eppnt->p_offset -
1111 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1112 	up_write(&current->mm->mmap_sem);
1113 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1114 		goto out_free_ph;
1115 
1116 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1117 	if (padzero(elf_bss)) {
1118 		error = -EFAULT;
1119 		goto out_free_ph;
1120 	}
1121 
1122 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1123 			    ELF_MIN_ALIGN - 1);
1124 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1125 	if (bss > len) {
1126 		down_write(&current->mm->mmap_sem);
1127 		do_brk(len, bss - len);
1128 		up_write(&current->mm->mmap_sem);
1129 	}
1130 	error = 0;
1131 
1132 out_free_ph:
1133 	kfree(elf_phdata);
1134 out:
1135 	return error;
1136 }
1137 
1138 /*
1139  * Note that some platforms still use traditional core dumps and not
1140  * the ELF core dump.  Each platform can select it as appropriate.
1141  */
1142 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1143 
1144 /*
1145  * ELF core dumper
1146  *
1147  * Modelled on fs/exec.c:aout_core_dump()
1148  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1149  */
1150 /*
1151  * These are the only things you should do on a core-file: use only these
1152  * functions to write out all the necessary info.
1153  */
1154 static int dump_write(struct file *file, const void *addr, int nr)
1155 {
1156 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1157 }
1158 
1159 static int dump_seek(struct file *file, loff_t off)
1160 {
1161 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1162 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1163 			return 0;
1164 	} else {
1165 		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1166 		if (!buf)
1167 			return 0;
1168 		while (off > 0) {
1169 			unsigned long n = off;
1170 			if (n > PAGE_SIZE)
1171 				n = PAGE_SIZE;
1172 			if (!dump_write(file, buf, n))
1173 				return 0;
1174 			off -= n;
1175 		}
1176 		free_page((unsigned long)buf);
1177 	}
1178 	return 1;
1179 }
1180 
1181 /*
1182  * Decide whether a segment is worth dumping; default is yes to be
1183  * sure (missing info is worse than too much; etc).
1184  * Personally I'd include everything, and use the coredump limit...
1185  *
1186  * I think we should skip something. But I am not sure how. H.J.
1187  */
1188 static int maydump(struct vm_area_struct *vma)
1189 {
1190 	/* The vma can be set up to tell us the answer directly.  */
1191 	if (vma->vm_flags & VM_ALWAYSDUMP)
1192 		return 1;
1193 
1194 	/* Do not dump I/O mapped devices or special mappings */
1195 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1196 		return 0;
1197 
1198 	/* Dump shared memory only if mapped from an anonymous file. */
1199 	if (vma->vm_flags & VM_SHARED)
1200 		return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1201 
1202 	/* If it hasn't been written to, don't write it out */
1203 	if (!vma->anon_vma)
1204 		return 0;
1205 
1206 	return 1;
1207 }
1208 
1209 /* An ELF note in memory */
1210 struct memelfnote
1211 {
1212 	const char *name;
1213 	int type;
1214 	unsigned int datasz;
1215 	void *data;
1216 };
1217 
1218 static int notesize(struct memelfnote *en)
1219 {
1220 	int sz;
1221 
1222 	sz = sizeof(struct elf_note);
1223 	sz += roundup(strlen(en->name) + 1, 4);
1224 	sz += roundup(en->datasz, 4);
1225 
1226 	return sz;
1227 }
1228 
1229 #define DUMP_WRITE(addr, nr, foffset)	\
1230 	do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1231 
1232 static int alignfile(struct file *file, loff_t *foffset)
1233 {
1234 	static const char buf[4] = { 0, };
1235 	DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1236 	return 1;
1237 }
1238 
1239 static int writenote(struct memelfnote *men, struct file *file,
1240 			loff_t *foffset)
1241 {
1242 	struct elf_note en;
1243 	en.n_namesz = strlen(men->name) + 1;
1244 	en.n_descsz = men->datasz;
1245 	en.n_type = men->type;
1246 
1247 	DUMP_WRITE(&en, sizeof(en), foffset);
1248 	DUMP_WRITE(men->name, en.n_namesz, foffset);
1249 	if (!alignfile(file, foffset))
1250 		return 0;
1251 	DUMP_WRITE(men->data, men->datasz, foffset);
1252 	if (!alignfile(file, foffset))
1253 		return 0;
1254 
1255 	return 1;
1256 }
1257 #undef DUMP_WRITE
1258 
1259 #define DUMP_WRITE(addr, nr)	\
1260 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1261 		goto end_coredump;
1262 #define DUMP_SEEK(off)	\
1263 	if (!dump_seek(file, (off))) \
1264 		goto end_coredump;
1265 
1266 static void fill_elf_header(struct elfhdr *elf, int segs)
1267 {
1268 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1269 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1270 	elf->e_ident[EI_DATA] = ELF_DATA;
1271 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1272 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1273 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1274 
1275 	elf->e_type = ET_CORE;
1276 	elf->e_machine = ELF_ARCH;
1277 	elf->e_version = EV_CURRENT;
1278 	elf->e_entry = 0;
1279 	elf->e_phoff = sizeof(struct elfhdr);
1280 	elf->e_shoff = 0;
1281 	elf->e_flags = ELF_CORE_EFLAGS;
1282 	elf->e_ehsize = sizeof(struct elfhdr);
1283 	elf->e_phentsize = sizeof(struct elf_phdr);
1284 	elf->e_phnum = segs;
1285 	elf->e_shentsize = 0;
1286 	elf->e_shnum = 0;
1287 	elf->e_shstrndx = 0;
1288 	return;
1289 }
1290 
1291 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1292 {
1293 	phdr->p_type = PT_NOTE;
1294 	phdr->p_offset = offset;
1295 	phdr->p_vaddr = 0;
1296 	phdr->p_paddr = 0;
1297 	phdr->p_filesz = sz;
1298 	phdr->p_memsz = 0;
1299 	phdr->p_flags = 0;
1300 	phdr->p_align = 0;
1301 	return;
1302 }
1303 
1304 static void fill_note(struct memelfnote *note, const char *name, int type,
1305 		unsigned int sz, void *data)
1306 {
1307 	note->name = name;
1308 	note->type = type;
1309 	note->datasz = sz;
1310 	note->data = data;
1311 	return;
1312 }
1313 
1314 /*
1315  * fill up all the fields in prstatus from the given task struct, except
1316  * registers which need to be filled up separately.
1317  */
1318 static void fill_prstatus(struct elf_prstatus *prstatus,
1319 		struct task_struct *p, long signr)
1320 {
1321 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1322 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1323 	prstatus->pr_sighold = p->blocked.sig[0];
1324 	prstatus->pr_pid = p->pid;
1325 	prstatus->pr_ppid = p->parent->pid;
1326 	prstatus->pr_pgrp = process_group(p);
1327 	prstatus->pr_sid = process_session(p);
1328 	if (thread_group_leader(p)) {
1329 		/*
1330 		 * This is the record for the group leader.  Add in the
1331 		 * cumulative times of previous dead threads.  This total
1332 		 * won't include the time of each live thread whose state
1333 		 * is included in the core dump.  The final total reported
1334 		 * to our parent process when it calls wait4 will include
1335 		 * those sums as well as the little bit more time it takes
1336 		 * this and each other thread to finish dying after the
1337 		 * core dump synchronization phase.
1338 		 */
1339 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1340 				   &prstatus->pr_utime);
1341 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1342 				   &prstatus->pr_stime);
1343 	} else {
1344 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1345 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1346 	}
1347 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1348 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1349 }
1350 
1351 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1352 		       struct mm_struct *mm)
1353 {
1354 	unsigned int i, len;
1355 
1356 	/* first copy the parameters from user space */
1357 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1358 
1359 	len = mm->arg_end - mm->arg_start;
1360 	if (len >= ELF_PRARGSZ)
1361 		len = ELF_PRARGSZ-1;
1362 	if (copy_from_user(&psinfo->pr_psargs,
1363 		           (const char __user *)mm->arg_start, len))
1364 		return -EFAULT;
1365 	for(i = 0; i < len; i++)
1366 		if (psinfo->pr_psargs[i] == 0)
1367 			psinfo->pr_psargs[i] = ' ';
1368 	psinfo->pr_psargs[len] = 0;
1369 
1370 	psinfo->pr_pid = p->pid;
1371 	psinfo->pr_ppid = p->parent->pid;
1372 	psinfo->pr_pgrp = process_group(p);
1373 	psinfo->pr_sid = process_session(p);
1374 
1375 	i = p->state ? ffz(~p->state) + 1 : 0;
1376 	psinfo->pr_state = i;
1377 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1378 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1379 	psinfo->pr_nice = task_nice(p);
1380 	psinfo->pr_flag = p->flags;
1381 	SET_UID(psinfo->pr_uid, p->uid);
1382 	SET_GID(psinfo->pr_gid, p->gid);
1383 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1384 
1385 	return 0;
1386 }
1387 
1388 /* Here is the structure in which status of each thread is captured. */
1389 struct elf_thread_status
1390 {
1391 	struct list_head list;
1392 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1393 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1394 	struct task_struct *thread;
1395 #ifdef ELF_CORE_COPY_XFPREGS
1396 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1397 #endif
1398 	struct memelfnote notes[3];
1399 	int num_notes;
1400 };
1401 
1402 /*
1403  * In order to add the specific thread information for the elf file format,
1404  * we need to keep a linked list of every threads pr_status and then create
1405  * a single section for them in the final core file.
1406  */
1407 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1408 {
1409 	int sz = 0;
1410 	struct task_struct *p = t->thread;
1411 	t->num_notes = 0;
1412 
1413 	fill_prstatus(&t->prstatus, p, signr);
1414 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1415 
1416 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1417 		  &(t->prstatus));
1418 	t->num_notes++;
1419 	sz += notesize(&t->notes[0]);
1420 
1421 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1422 								&t->fpu))) {
1423 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1424 			  &(t->fpu));
1425 		t->num_notes++;
1426 		sz += notesize(&t->notes[1]);
1427 	}
1428 
1429 #ifdef ELF_CORE_COPY_XFPREGS
1430 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1431 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1432 			  &t->xfpu);
1433 		t->num_notes++;
1434 		sz += notesize(&t->notes[2]);
1435 	}
1436 #endif
1437 	return sz;
1438 }
1439 
1440 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1441 					struct vm_area_struct *gate_vma)
1442 {
1443 	struct vm_area_struct *ret = tsk->mm->mmap;
1444 
1445 	if (ret)
1446 		return ret;
1447 	return gate_vma;
1448 }
1449 /*
1450  * Helper function for iterating across a vma list.  It ensures that the caller
1451  * will visit `gate_vma' prior to terminating the search.
1452  */
1453 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1454 					struct vm_area_struct *gate_vma)
1455 {
1456 	struct vm_area_struct *ret;
1457 
1458 	ret = this_vma->vm_next;
1459 	if (ret)
1460 		return ret;
1461 	if (this_vma == gate_vma)
1462 		return NULL;
1463 	return gate_vma;
1464 }
1465 
1466 /*
1467  * Actual dumper
1468  *
1469  * This is a two-pass process; first we find the offsets of the bits,
1470  * and then they are actually written out.  If we run out of core limit
1471  * we just truncate.
1472  */
1473 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1474 {
1475 #define	NUM_NOTES	6
1476 	int has_dumped = 0;
1477 	mm_segment_t fs;
1478 	int segs;
1479 	size_t size = 0;
1480 	int i;
1481 	struct vm_area_struct *vma, *gate_vma;
1482 	struct elfhdr *elf = NULL;
1483 	loff_t offset = 0, dataoff, foffset;
1484 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1485 	int numnote;
1486 	struct memelfnote *notes = NULL;
1487 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1488 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1489  	struct task_struct *g, *p;
1490  	LIST_HEAD(thread_list);
1491  	struct list_head *t;
1492 	elf_fpregset_t *fpu = NULL;
1493 #ifdef ELF_CORE_COPY_XFPREGS
1494 	elf_fpxregset_t *xfpu = NULL;
1495 #endif
1496 	int thread_status_size = 0;
1497 	elf_addr_t *auxv;
1498 
1499 	/*
1500 	 * We no longer stop all VM operations.
1501 	 *
1502 	 * This is because those proceses that could possibly change map_count
1503 	 * or the mmap / vma pages are now blocked in do_exit on current
1504 	 * finishing this core dump.
1505 	 *
1506 	 * Only ptrace can touch these memory addresses, but it doesn't change
1507 	 * the map_count or the pages allocated. So no possibility of crashing
1508 	 * exists while dumping the mm->vm_next areas to the core file.
1509 	 */
1510 
1511 	/* alloc memory for large data structures: too large to be on stack */
1512 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1513 	if (!elf)
1514 		goto cleanup;
1515 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1516 	if (!prstatus)
1517 		goto cleanup;
1518 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1519 	if (!psinfo)
1520 		goto cleanup;
1521 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1522 	if (!notes)
1523 		goto cleanup;
1524 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1525 	if (!fpu)
1526 		goto cleanup;
1527 #ifdef ELF_CORE_COPY_XFPREGS
1528 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1529 	if (!xfpu)
1530 		goto cleanup;
1531 #endif
1532 
1533 	if (signr) {
1534 		struct elf_thread_status *tmp;
1535 		rcu_read_lock();
1536 		do_each_thread(g,p)
1537 			if (current->mm == p->mm && current != p) {
1538 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1539 				if (!tmp) {
1540 					rcu_read_unlock();
1541 					goto cleanup;
1542 				}
1543 				tmp->thread = p;
1544 				list_add(&tmp->list, &thread_list);
1545 			}
1546 		while_each_thread(g,p);
1547 		rcu_read_unlock();
1548 		list_for_each(t, &thread_list) {
1549 			struct elf_thread_status *tmp;
1550 			int sz;
1551 
1552 			tmp = list_entry(t, struct elf_thread_status, list);
1553 			sz = elf_dump_thread_status(signr, tmp);
1554 			thread_status_size += sz;
1555 		}
1556 	}
1557 	/* now collect the dump for the current */
1558 	memset(prstatus, 0, sizeof(*prstatus));
1559 	fill_prstatus(prstatus, current, signr);
1560 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1561 
1562 	segs = current->mm->map_count;
1563 #ifdef ELF_CORE_EXTRA_PHDRS
1564 	segs += ELF_CORE_EXTRA_PHDRS;
1565 #endif
1566 
1567 	gate_vma = get_gate_vma(current);
1568 	if (gate_vma != NULL)
1569 		segs++;
1570 
1571 	/* Set up header */
1572 	fill_elf_header(elf, segs + 1);	/* including notes section */
1573 
1574 	has_dumped = 1;
1575 	current->flags |= PF_DUMPCORE;
1576 
1577 	/*
1578 	 * Set up the notes in similar form to SVR4 core dumps made
1579 	 * with info from their /proc.
1580 	 */
1581 
1582 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1583 	fill_psinfo(psinfo, current->group_leader, current->mm);
1584 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1585 
1586 	numnote = 2;
1587 
1588 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1589 
1590 	i = 0;
1591 	do
1592 		i += 2;
1593 	while (auxv[i - 2] != AT_NULL);
1594 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1595 		  i * sizeof(elf_addr_t), auxv);
1596 
1597   	/* Try to dump the FPU. */
1598 	if ((prstatus->pr_fpvalid =
1599 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1600 		fill_note(notes + numnote++,
1601 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1602 #ifdef ELF_CORE_COPY_XFPREGS
1603 	if (elf_core_copy_task_xfpregs(current, xfpu))
1604 		fill_note(notes + numnote++,
1605 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1606 #endif
1607 
1608 	fs = get_fs();
1609 	set_fs(KERNEL_DS);
1610 
1611 	DUMP_WRITE(elf, sizeof(*elf));
1612 	offset += sizeof(*elf);				/* Elf header */
1613 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1614 	foffset = offset;
1615 
1616 	/* Write notes phdr entry */
1617 	{
1618 		struct elf_phdr phdr;
1619 		int sz = 0;
1620 
1621 		for (i = 0; i < numnote; i++)
1622 			sz += notesize(notes + i);
1623 
1624 		sz += thread_status_size;
1625 
1626 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1627 		sz += ELF_CORE_EXTRA_NOTES_SIZE;
1628 #endif
1629 
1630 		fill_elf_note_phdr(&phdr, sz, offset);
1631 		offset += sz;
1632 		DUMP_WRITE(&phdr, sizeof(phdr));
1633 	}
1634 
1635 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1636 
1637 	/* Write program headers for segments dump */
1638 	for (vma = first_vma(current, gate_vma); vma != NULL;
1639 			vma = next_vma(vma, gate_vma)) {
1640 		struct elf_phdr phdr;
1641 		size_t sz;
1642 
1643 		sz = vma->vm_end - vma->vm_start;
1644 
1645 		phdr.p_type = PT_LOAD;
1646 		phdr.p_offset = offset;
1647 		phdr.p_vaddr = vma->vm_start;
1648 		phdr.p_paddr = 0;
1649 		phdr.p_filesz = maydump(vma) ? sz : 0;
1650 		phdr.p_memsz = sz;
1651 		offset += phdr.p_filesz;
1652 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1653 		if (vma->vm_flags & VM_WRITE)
1654 			phdr.p_flags |= PF_W;
1655 		if (vma->vm_flags & VM_EXEC)
1656 			phdr.p_flags |= PF_X;
1657 		phdr.p_align = ELF_EXEC_PAGESIZE;
1658 
1659 		DUMP_WRITE(&phdr, sizeof(phdr));
1660 	}
1661 
1662 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1663 	ELF_CORE_WRITE_EXTRA_PHDRS;
1664 #endif
1665 
1666  	/* write out the notes section */
1667 	for (i = 0; i < numnote; i++)
1668 		if (!writenote(notes + i, file, &foffset))
1669 			goto end_coredump;
1670 
1671 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1672 	ELF_CORE_WRITE_EXTRA_NOTES;
1673 #endif
1674 
1675 	/* write out the thread status notes section */
1676 	list_for_each(t, &thread_list) {
1677 		struct elf_thread_status *tmp =
1678 				list_entry(t, struct elf_thread_status, list);
1679 
1680 		for (i = 0; i < tmp->num_notes; i++)
1681 			if (!writenote(&tmp->notes[i], file, &foffset))
1682 				goto end_coredump;
1683 	}
1684 
1685 	/* Align to page */
1686 	DUMP_SEEK(dataoff - foffset);
1687 
1688 	for (vma = first_vma(current, gate_vma); vma != NULL;
1689 			vma = next_vma(vma, gate_vma)) {
1690 		unsigned long addr;
1691 
1692 		if (!maydump(vma))
1693 			continue;
1694 
1695 		for (addr = vma->vm_start;
1696 		     addr < vma->vm_end;
1697 		     addr += PAGE_SIZE) {
1698 			struct page *page;
1699 			struct vm_area_struct *vma;
1700 
1701 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1702 						&page, &vma) <= 0) {
1703 				DUMP_SEEK(PAGE_SIZE);
1704 			} else {
1705 				if (page == ZERO_PAGE(addr)) {
1706 					DUMP_SEEK(PAGE_SIZE);
1707 				} else {
1708 					void *kaddr;
1709 					flush_cache_page(vma, addr,
1710 							 page_to_pfn(page));
1711 					kaddr = kmap(page);
1712 					if ((size += PAGE_SIZE) > limit ||
1713 					    !dump_write(file, kaddr,
1714 					    PAGE_SIZE)) {
1715 						kunmap(page);
1716 						page_cache_release(page);
1717 						goto end_coredump;
1718 					}
1719 					kunmap(page);
1720 				}
1721 				page_cache_release(page);
1722 			}
1723 		}
1724 	}
1725 
1726 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1727 	ELF_CORE_WRITE_EXTRA_DATA;
1728 #endif
1729 
1730 end_coredump:
1731 	set_fs(fs);
1732 
1733 cleanup:
1734 	while (!list_empty(&thread_list)) {
1735 		struct list_head *tmp = thread_list.next;
1736 		list_del(tmp);
1737 		kfree(list_entry(tmp, struct elf_thread_status, list));
1738 	}
1739 
1740 	kfree(elf);
1741 	kfree(prstatus);
1742 	kfree(psinfo);
1743 	kfree(notes);
1744 	kfree(fpu);
1745 #ifdef ELF_CORE_COPY_XFPREGS
1746 	kfree(xfpu);
1747 #endif
1748 	return has_dumped;
1749 #undef NUM_NOTES
1750 }
1751 
1752 #endif		/* USE_ELF_CORE_DUMP */
1753 
1754 static int __init init_elf_binfmt(void)
1755 {
1756 	return register_binfmt(&elf_format);
1757 }
1758 
1759 static void __exit exit_elf_binfmt(void)
1760 {
1761 	/* Remove the COFF and ELF loaders. */
1762 	unregister_binfmt(&elf_format);
1763 }
1764 
1765 core_initcall(init_elf_binfmt);
1766 module_exit(exit_elf_binfmt);
1767 MODULE_LICENSE("GPL");
1768