xref: /linux/fs/binfmt_elf.c (revision 6e8331ac6973435b1e7604c30f2ad394035b46e1)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
50 
51 #ifndef elf_addr_t
52 #define elf_addr_t unsigned long
53 #endif
54 
55 /*
56  * If we don't support core dumping, then supply a NULL so we
57  * don't even try.
58  */
59 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
60 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
61 #else
62 #define elf_core_dump	NULL
63 #endif
64 
65 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
66 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
67 #else
68 #define ELF_MIN_ALIGN	PAGE_SIZE
69 #endif
70 
71 #ifndef ELF_CORE_EFLAGS
72 #define ELF_CORE_EFLAGS	0
73 #endif
74 
75 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
76 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
77 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
78 
79 static struct linux_binfmt elf_format = {
80 		.module		= THIS_MODULE,
81 		.load_binary	= load_elf_binary,
82 		.load_shlib	= load_elf_library,
83 		.core_dump	= elf_core_dump,
84 		.min_coredump	= ELF_EXEC_PAGESIZE
85 };
86 
87 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
88 
89 static int set_brk(unsigned long start, unsigned long end)
90 {
91 	start = ELF_PAGEALIGN(start);
92 	end = ELF_PAGEALIGN(end);
93 	if (end > start) {
94 		unsigned long addr;
95 		down_write(&current->mm->mmap_sem);
96 		addr = do_brk(start, end - start);
97 		up_write(&current->mm->mmap_sem);
98 		if (BAD_ADDR(addr))
99 			return addr;
100 	}
101 	current->mm->start_brk = current->mm->brk = end;
102 	return 0;
103 }
104 
105 /* We need to explicitly zero any fractional pages
106    after the data section (i.e. bss).  This would
107    contain the junk from the file that should not
108    be in memory
109  */
110 static int padzero(unsigned long elf_bss)
111 {
112 	unsigned long nbyte;
113 
114 	nbyte = ELF_PAGEOFFSET(elf_bss);
115 	if (nbyte) {
116 		nbyte = ELF_MIN_ALIGN - nbyte;
117 		if (clear_user((void __user *) elf_bss, nbyte))
118 			return -EFAULT;
119 	}
120 	return 0;
121 }
122 
123 /* Let's use some macros to make this stack manipulation a litle clearer */
124 #ifdef CONFIG_STACK_GROWSUP
125 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
126 #define STACK_ROUND(sp, items) \
127 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
128 #define STACK_ALLOC(sp, len) ({ \
129 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
130 	old_sp; })
131 #else
132 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
133 #define STACK_ROUND(sp, items) \
134 	(((unsigned long) (sp - items)) &~ 15UL)
135 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
136 #endif
137 
138 static int
139 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
140 		int interp_aout, unsigned long load_addr,
141 		unsigned long interp_load_addr)
142 {
143 	unsigned long p = bprm->p;
144 	int argc = bprm->argc;
145 	int envc = bprm->envc;
146 	elf_addr_t __user *argv;
147 	elf_addr_t __user *envp;
148 	elf_addr_t __user *sp;
149 	elf_addr_t __user *u_platform;
150 	const char *k_platform = ELF_PLATFORM;
151 	int items;
152 	elf_addr_t *elf_info;
153 	int ei_index = 0;
154 	struct task_struct *tsk = current;
155 
156 	/*
157 	 * If this architecture has a platform capability string, copy it
158 	 * to userspace.  In some cases (Sparc), this info is impossible
159 	 * for userspace to get any other way, in others (i386) it is
160 	 * merely difficult.
161 	 */
162 	u_platform = NULL;
163 	if (k_platform) {
164 		size_t len = strlen(k_platform) + 1;
165 
166 		/*
167 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
168 		 * evictions by the processes running on the same package. One
169 		 * thing we can do is to shuffle the initial stack for them.
170 		 */
171 
172 		p = arch_align_stack(p);
173 
174 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
175 		if (__copy_to_user(u_platform, k_platform, len))
176 			return -EFAULT;
177 	}
178 
179 	/* Create the ELF interpreter info */
180 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
181 #define NEW_AUX_ENT(id, val) \
182 	do { \
183 		elf_info[ei_index++] = id; \
184 		elf_info[ei_index++] = val; \
185 	} while (0)
186 
187 #ifdef ARCH_DLINFO
188 	/*
189 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 	 * AUXV.
191 	 */
192 	ARCH_DLINFO;
193 #endif
194 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 	NEW_AUX_ENT(AT_FLAGS, 0);
202 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 	NEW_AUX_ENT(AT_UID, tsk->uid);
204 	NEW_AUX_ENT(AT_EUID, tsk->euid);
205 	NEW_AUX_ENT(AT_GID, tsk->gid);
206 	NEW_AUX_ENT(AT_EGID, tsk->egid);
207  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1);
226 	if (interp_aout) {
227 		items += 3; /* a.out interpreters require argv & envp too */
228 	} else {
229 		items += 1; /* ELF interpreters only put argc on the stack */
230 	}
231 	bprm->p = STACK_ROUND(sp, items);
232 
233 	/* Point sp at the lowest address on the stack */
234 #ifdef CONFIG_STACK_GROWSUP
235 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
236 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
237 #else
238 	sp = (elf_addr_t __user *)bprm->p;
239 #endif
240 
241 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
242 	if (__put_user(argc, sp++))
243 		return -EFAULT;
244 	if (interp_aout) {
245 		argv = sp + 2;
246 		envp = argv + argc + 1;
247 		__put_user((elf_addr_t)(unsigned long)argv, sp++);
248 		__put_user((elf_addr_t)(unsigned long)envp, sp++);
249 	} else {
250 		argv = sp;
251 		envp = argv + argc + 1;
252 	}
253 
254 	/* Populate argv and envp */
255 	p = current->mm->arg_end = current->mm->arg_start;
256 	while (argc-- > 0) {
257 		size_t len;
258 		__put_user((elf_addr_t)p, argv++);
259 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
260 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
261 			return 0;
262 		p += len;
263 	}
264 	if (__put_user(0, argv))
265 		return -EFAULT;
266 	current->mm->arg_end = current->mm->env_start = p;
267 	while (envc-- > 0) {
268 		size_t len;
269 		__put_user((elf_addr_t)p, envp++);
270 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
271 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
272 			return 0;
273 		p += len;
274 	}
275 	if (__put_user(0, envp))
276 		return -EFAULT;
277 	current->mm->env_end = p;
278 
279 	/* Put the elf_info on the stack in the right place.  */
280 	sp = (elf_addr_t __user *)envp + 1;
281 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
282 		return -EFAULT;
283 	return 0;
284 }
285 
286 #ifndef elf_map
287 
288 static unsigned long elf_map(struct file *filep, unsigned long addr,
289 		struct elf_phdr *eppnt, int prot, int type)
290 {
291 	unsigned long map_addr;
292 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
293 
294 	down_write(&current->mm->mmap_sem);
295 	/* mmap() will return -EINVAL if given a zero size, but a
296 	 * segment with zero filesize is perfectly valid */
297 	if (eppnt->p_filesz + pageoffset)
298 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
299 				   eppnt->p_filesz + pageoffset, prot, type,
300 				   eppnt->p_offset - pageoffset);
301 	else
302 		map_addr = ELF_PAGESTART(addr);
303 	up_write(&current->mm->mmap_sem);
304 	return(map_addr);
305 }
306 
307 #endif /* !elf_map */
308 
309 /* This is much more generalized than the library routine read function,
310    so we keep this separate.  Technically the library read function
311    is only provided so that we can read a.out libraries that have
312    an ELF header */
313 
314 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
315 		struct file *interpreter, unsigned long *interp_load_addr)
316 {
317 	struct elf_phdr *elf_phdata;
318 	struct elf_phdr *eppnt;
319 	unsigned long load_addr = 0;
320 	int load_addr_set = 0;
321 	unsigned long last_bss = 0, elf_bss = 0;
322 	unsigned long error = ~0UL;
323 	int retval, i, size;
324 
325 	/* First of all, some simple consistency checks */
326 	if (interp_elf_ex->e_type != ET_EXEC &&
327 	    interp_elf_ex->e_type != ET_DYN)
328 		goto out;
329 	if (!elf_check_arch(interp_elf_ex))
330 		goto out;
331 	if (!interpreter->f_op || !interpreter->f_op->mmap)
332 		goto out;
333 
334 	/*
335 	 * If the size of this structure has changed, then punt, since
336 	 * we will be doing the wrong thing.
337 	 */
338 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 		goto out;
340 	if (interp_elf_ex->e_phnum < 1 ||
341 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 		goto out;
343 
344 	/* Now read in all of the header information */
345 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
346 	if (size > ELF_MIN_ALIGN)
347 		goto out;
348 	elf_phdata = kmalloc(size, GFP_KERNEL);
349 	if (!elf_phdata)
350 		goto out;
351 
352 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
353 			     (char *)elf_phdata,size);
354 	error = -EIO;
355 	if (retval != size) {
356 		if (retval < 0)
357 			error = retval;
358 		goto out_close;
359 	}
360 
361 	eppnt = elf_phdata;
362 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
363 		if (eppnt->p_type == PT_LOAD) {
364 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 			int elf_prot = 0;
366 			unsigned long vaddr = 0;
367 			unsigned long k, map_addr;
368 
369 			if (eppnt->p_flags & PF_R)
370 		    		elf_prot = PROT_READ;
371 			if (eppnt->p_flags & PF_W)
372 				elf_prot |= PROT_WRITE;
373 			if (eppnt->p_flags & PF_X)
374 				elf_prot |= PROT_EXEC;
375 			vaddr = eppnt->p_vaddr;
376 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
377 				elf_type |= MAP_FIXED;
378 
379 			map_addr = elf_map(interpreter, load_addr + vaddr,
380 					   eppnt, elf_prot, elf_type);
381 			error = map_addr;
382 			if (BAD_ADDR(map_addr))
383 				goto out_close;
384 
385 			if (!load_addr_set &&
386 			    interp_elf_ex->e_type == ET_DYN) {
387 				load_addr = map_addr - ELF_PAGESTART(vaddr);
388 				load_addr_set = 1;
389 			}
390 
391 			/*
392 			 * Check to see if the section's size will overflow the
393 			 * allowed task size. Note that p_filesz must always be
394 			 * <= p_memsize so it's only necessary to check p_memsz.
395 			 */
396 			k = load_addr + eppnt->p_vaddr;
397 			if (BAD_ADDR(k) ||
398 			    eppnt->p_filesz > eppnt->p_memsz ||
399 			    eppnt->p_memsz > TASK_SIZE ||
400 			    TASK_SIZE - eppnt->p_memsz < k) {
401 				error = -ENOMEM;
402 				goto out_close;
403 			}
404 
405 			/*
406 			 * Find the end of the file mapping for this phdr, and
407 			 * keep track of the largest address we see for this.
408 			 */
409 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
410 			if (k > elf_bss)
411 				elf_bss = k;
412 
413 			/*
414 			 * Do the same thing for the memory mapping - between
415 			 * elf_bss and last_bss is the bss section.
416 			 */
417 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
418 			if (k > last_bss)
419 				last_bss = k;
420 		}
421 	}
422 
423 	/*
424 	 * Now fill out the bss section.  First pad the last page up
425 	 * to the page boundary, and then perform a mmap to make sure
426 	 * that there are zero-mapped pages up to and including the
427 	 * last bss page.
428 	 */
429 	if (padzero(elf_bss)) {
430 		error = -EFAULT;
431 		goto out_close;
432 	}
433 
434 	/* What we have mapped so far */
435 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
436 
437 	/* Map the last of the bss segment */
438 	if (last_bss > elf_bss) {
439 		down_write(&current->mm->mmap_sem);
440 		error = do_brk(elf_bss, last_bss - elf_bss);
441 		up_write(&current->mm->mmap_sem);
442 		if (BAD_ADDR(error))
443 			goto out_close;
444 	}
445 
446 	*interp_load_addr = load_addr;
447 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
448 
449 out_close:
450 	kfree(elf_phdata);
451 out:
452 	return error;
453 }
454 
455 static unsigned long load_aout_interp(struct exec *interp_ex,
456 		struct file *interpreter)
457 {
458 	unsigned long text_data, elf_entry = ~0UL;
459 	char __user * addr;
460 	loff_t offset;
461 
462 	current->mm->end_code = interp_ex->a_text;
463 	text_data = interp_ex->a_text + interp_ex->a_data;
464 	current->mm->end_data = text_data;
465 	current->mm->brk = interp_ex->a_bss + text_data;
466 
467 	switch (N_MAGIC(*interp_ex)) {
468 	case OMAGIC:
469 		offset = 32;
470 		addr = (char __user *)0;
471 		break;
472 	case ZMAGIC:
473 	case QMAGIC:
474 		offset = N_TXTOFF(*interp_ex);
475 		addr = (char __user *)N_TXTADDR(*interp_ex);
476 		break;
477 	default:
478 		goto out;
479 	}
480 
481 	down_write(&current->mm->mmap_sem);
482 	do_brk(0, text_data);
483 	up_write(&current->mm->mmap_sem);
484 	if (!interpreter->f_op || !interpreter->f_op->read)
485 		goto out;
486 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
487 		goto out;
488 	flush_icache_range((unsigned long)addr,
489 	                   (unsigned long)addr + text_data);
490 
491 	down_write(&current->mm->mmap_sem);
492 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
493 		interp_ex->a_bss);
494 	up_write(&current->mm->mmap_sem);
495 	elf_entry = interp_ex->a_entry;
496 
497 out:
498 	return elf_entry;
499 }
500 
501 /*
502  * These are the functions used to load ELF style executables and shared
503  * libraries.  There is no binary dependent code anywhere else.
504  */
505 
506 #define INTERPRETER_NONE 0
507 #define INTERPRETER_AOUT 1
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if (current->flags & PF_RANDOMIZE) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
712 
713 	/* Some simple consistency checks for the interpreter */
714 	if (elf_interpreter) {
715 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
716 
717 		/* Now figure out which format our binary is */
718 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
719 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
720 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
721 			interpreter_type = INTERPRETER_ELF;
722 
723 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
724 			interpreter_type &= ~INTERPRETER_ELF;
725 
726 		retval = -ELIBBAD;
727 		if (!interpreter_type)
728 			goto out_free_dentry;
729 
730 		/* Make sure only one type was selected */
731 		if ((interpreter_type & INTERPRETER_ELF) &&
732 		     interpreter_type != INTERPRETER_ELF) {
733 	     		// FIXME - ratelimit this before re-enabling
734 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
735 			interpreter_type = INTERPRETER_ELF;
736 		}
737 		/* Verify the interpreter has a valid arch */
738 		if ((interpreter_type == INTERPRETER_ELF) &&
739 		    !elf_check_arch(&loc->interp_elf_ex))
740 			goto out_free_dentry;
741 	} else {
742 		/* Executables without an interpreter also need a personality  */
743 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
744 	}
745 
746 	/* OK, we are done with that, now set up the arg stuff,
747 	   and then start this sucker up */
748 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
749 		char *passed_p = passed_fileno;
750 		sprintf(passed_fileno, "%d", elf_exec_fileno);
751 
752 		if (elf_interpreter) {
753 			retval = copy_strings_kernel(1, &passed_p, bprm);
754 			if (retval)
755 				goto out_free_dentry;
756 			bprm->argc++;
757 		}
758 	}
759 
760 	/* Flush all traces of the currently running executable */
761 	retval = flush_old_exec(bprm);
762 	if (retval)
763 		goto out_free_dentry;
764 
765 	/* Discard our unneeded old files struct */
766 	if (files) {
767 		put_files_struct(files);
768 		files = NULL;
769 	}
770 
771 	/* OK, This is the point of no return */
772 	current->mm->start_data = 0;
773 	current->mm->end_data = 0;
774 	current->mm->end_code = 0;
775 	current->mm->mmap = NULL;
776 	current->flags &= ~PF_FORKNOEXEC;
777 	current->mm->def_flags = def_flags;
778 
779 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
780 	   may depend on the personality.  */
781 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
782 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
783 		current->personality |= READ_IMPLIES_EXEC;
784 
785 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
786 		current->flags |= PF_RANDOMIZE;
787 	arch_pick_mmap_layout(current->mm);
788 
789 	/* Do this so that we can load the interpreter, if need be.  We will
790 	   change some of these later */
791 	current->mm->free_area_cache = current->mm->mmap_base;
792 	current->mm->cached_hole_size = 0;
793 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
794 				 executable_stack);
795 	if (retval < 0) {
796 		send_sig(SIGKILL, current, 0);
797 		goto out_free_dentry;
798 	}
799 
800 	current->mm->start_stack = bprm->p;
801 
802 	/* Now we do a little grungy work by mmaping the ELF image into
803 	   the correct location in memory.  At this point, we assume that
804 	   the image should be loaded at fixed address, not at a variable
805 	   address. */
806 	for(i = 0, elf_ppnt = elf_phdata;
807 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
808 		int elf_prot = 0, elf_flags;
809 		unsigned long k, vaddr;
810 
811 		if (elf_ppnt->p_type != PT_LOAD)
812 			continue;
813 
814 		if (unlikely (elf_brk > elf_bss)) {
815 			unsigned long nbyte;
816 
817 			/* There was a PT_LOAD segment with p_memsz > p_filesz
818 			   before this one. Map anonymous pages, if needed,
819 			   and clear the area.  */
820 			retval = set_brk (elf_bss + load_bias,
821 					  elf_brk + load_bias);
822 			if (retval) {
823 				send_sig(SIGKILL, current, 0);
824 				goto out_free_dentry;
825 			}
826 			nbyte = ELF_PAGEOFFSET(elf_bss);
827 			if (nbyte) {
828 				nbyte = ELF_MIN_ALIGN - nbyte;
829 				if (nbyte > elf_brk - elf_bss)
830 					nbyte = elf_brk - elf_bss;
831 				if (clear_user((void __user *)elf_bss +
832 							load_bias, nbyte)) {
833 					/*
834 					 * This bss-zeroing can fail if the ELF
835 					 * file specifies odd protections. So
836 					 * we don't check the return value
837 					 */
838 				}
839 			}
840 		}
841 
842 		if (elf_ppnt->p_flags & PF_R)
843 			elf_prot |= PROT_READ;
844 		if (elf_ppnt->p_flags & PF_W)
845 			elf_prot |= PROT_WRITE;
846 		if (elf_ppnt->p_flags & PF_X)
847 			elf_prot |= PROT_EXEC;
848 
849 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
850 
851 		vaddr = elf_ppnt->p_vaddr;
852 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
853 			elf_flags |= MAP_FIXED;
854 		} else if (loc->elf_ex.e_type == ET_DYN) {
855 			/* Try and get dynamic programs out of the way of the
856 			 * default mmap base, as well as whatever program they
857 			 * might try to exec.  This is because the brk will
858 			 * follow the loader, and is not movable.  */
859 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
860 		}
861 
862 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
863 				elf_prot, elf_flags);
864 		if (BAD_ADDR(error)) {
865 			send_sig(SIGKILL, current, 0);
866 			goto out_free_dentry;
867 		}
868 
869 		if (!load_addr_set) {
870 			load_addr_set = 1;
871 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
872 			if (loc->elf_ex.e_type == ET_DYN) {
873 				load_bias += error -
874 				             ELF_PAGESTART(load_bias + vaddr);
875 				load_addr += load_bias;
876 				reloc_func_desc = load_bias;
877 			}
878 		}
879 		k = elf_ppnt->p_vaddr;
880 		if (k < start_code)
881 			start_code = k;
882 		if (start_data < k)
883 			start_data = k;
884 
885 		/*
886 		 * Check to see if the section's size will overflow the
887 		 * allowed task size. Note that p_filesz must always be
888 		 * <= p_memsz so it is only necessary to check p_memsz.
889 		 */
890 		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 		    elf_ppnt->p_memsz > TASK_SIZE ||
892 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
893 			/* set_brk can never work. Avoid overflows. */
894 			send_sig(SIGKILL, current, 0);
895 			goto out_free_dentry;
896 		}
897 
898 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
899 
900 		if (k > elf_bss)
901 			elf_bss = k;
902 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
903 			end_code = k;
904 		if (end_data < k)
905 			end_data = k;
906 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
907 		if (k > elf_brk)
908 			elf_brk = k;
909 	}
910 
911 	loc->elf_ex.e_entry += load_bias;
912 	elf_bss += load_bias;
913 	elf_brk += load_bias;
914 	start_code += load_bias;
915 	end_code += load_bias;
916 	start_data += load_bias;
917 	end_data += load_bias;
918 
919 	/* Calling set_brk effectively mmaps the pages that we need
920 	 * for the bss and break sections.  We must do this before
921 	 * mapping in the interpreter, to make sure it doesn't wind
922 	 * up getting placed where the bss needs to go.
923 	 */
924 	retval = set_brk(elf_bss, elf_brk);
925 	if (retval) {
926 		send_sig(SIGKILL, current, 0);
927 		goto out_free_dentry;
928 	}
929 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
930 		send_sig(SIGSEGV, current, 0);
931 		retval = -EFAULT; /* Nobody gets to see this, but.. */
932 		goto out_free_dentry;
933 	}
934 
935 	if (elf_interpreter) {
936 		if (interpreter_type == INTERPRETER_AOUT)
937 			elf_entry = load_aout_interp(&loc->interp_ex,
938 						     interpreter);
939 		else
940 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
941 						    interpreter,
942 						    &interp_load_addr);
943 		if (BAD_ADDR(elf_entry)) {
944 			force_sig(SIGSEGV, current);
945 			retval = IS_ERR((void *)elf_entry) ?
946 					(int)elf_entry : -EINVAL;
947 			goto out_free_dentry;
948 		}
949 		reloc_func_desc = interp_load_addr;
950 
951 		allow_write_access(interpreter);
952 		fput(interpreter);
953 		kfree(elf_interpreter);
954 	} else {
955 		elf_entry = loc->elf_ex.e_entry;
956 		if (BAD_ADDR(elf_entry)) {
957 			force_sig(SIGSEGV, current);
958 			retval = -EINVAL;
959 			goto out_free_dentry;
960 		}
961 	}
962 
963 	kfree(elf_phdata);
964 
965 	if (interpreter_type != INTERPRETER_AOUT)
966 		sys_close(elf_exec_fileno);
967 
968 	set_binfmt(&elf_format);
969 
970 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
971 	retval = arch_setup_additional_pages(bprm, executable_stack);
972 	if (retval < 0) {
973 		send_sig(SIGKILL, current, 0);
974 		goto out;
975 	}
976 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
977 
978 	compute_creds(bprm);
979 	current->flags &= ~PF_FORKNOEXEC;
980 	create_elf_tables(bprm, &loc->elf_ex,
981 			  (interpreter_type == INTERPRETER_AOUT),
982 			  load_addr, interp_load_addr);
983 	/* N.B. passed_fileno might not be initialized? */
984 	if (interpreter_type == INTERPRETER_AOUT)
985 		current->mm->arg_start += strlen(passed_fileno) + 1;
986 	current->mm->end_code = end_code;
987 	current->mm->start_code = start_code;
988 	current->mm->start_data = start_data;
989 	current->mm->end_data = end_data;
990 	current->mm->start_stack = bprm->p;
991 
992 	if (current->personality & MMAP_PAGE_ZERO) {
993 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
994 		   and some applications "depend" upon this behavior.
995 		   Since we do not have the power to recompile these, we
996 		   emulate the SVr4 behavior. Sigh. */
997 		down_write(&current->mm->mmap_sem);
998 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
999 				MAP_FIXED | MAP_PRIVATE, 0);
1000 		up_write(&current->mm->mmap_sem);
1001 	}
1002 
1003 #ifdef ELF_PLAT_INIT
1004 	/*
1005 	 * The ABI may specify that certain registers be set up in special
1006 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1007 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1008 	 * that the e_entry field is the address of the function descriptor
1009 	 * for the startup routine, rather than the address of the startup
1010 	 * routine itself.  This macro performs whatever initialization to
1011 	 * the regs structure is required as well as any relocations to the
1012 	 * function descriptor entries when executing dynamically links apps.
1013 	 */
1014 	ELF_PLAT_INIT(regs, reloc_func_desc);
1015 #endif
1016 
1017 	start_thread(regs, elf_entry, bprm->p);
1018 	if (unlikely(current->ptrace & PT_PTRACED)) {
1019 		if (current->ptrace & PT_TRACE_EXEC)
1020 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1021 		else
1022 			send_sig(SIGTRAP, current, 0);
1023 	}
1024 	retval = 0;
1025 out:
1026 	kfree(loc);
1027 out_ret:
1028 	return retval;
1029 
1030 	/* error cleanup */
1031 out_free_dentry:
1032 	allow_write_access(interpreter);
1033 	if (interpreter)
1034 		fput(interpreter);
1035 out_free_interp:
1036 	kfree(elf_interpreter);
1037 out_free_file:
1038 	sys_close(elf_exec_fileno);
1039 out_free_fh:
1040 	if (files) {
1041 		put_files_struct(current->files);
1042 		current->files = files;
1043 	}
1044 out_free_ph:
1045 	kfree(elf_phdata);
1046 	goto out;
1047 }
1048 
1049 /* This is really simpleminded and specialized - we are loading an
1050    a.out library that is given an ELF header. */
1051 static int load_elf_library(struct file *file)
1052 {
1053 	struct elf_phdr *elf_phdata;
1054 	struct elf_phdr *eppnt;
1055 	unsigned long elf_bss, bss, len;
1056 	int retval, error, i, j;
1057 	struct elfhdr elf_ex;
1058 
1059 	error = -ENOEXEC;
1060 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1061 	if (retval != sizeof(elf_ex))
1062 		goto out;
1063 
1064 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1065 		goto out;
1066 
1067 	/* First of all, some simple consistency checks */
1068 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1069 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1070 		goto out;
1071 
1072 	/* Now read in all of the header information */
1073 
1074 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1075 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1076 
1077 	error = -ENOMEM;
1078 	elf_phdata = kmalloc(j, GFP_KERNEL);
1079 	if (!elf_phdata)
1080 		goto out;
1081 
1082 	eppnt = elf_phdata;
1083 	error = -ENOEXEC;
1084 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1085 	if (retval != j)
1086 		goto out_free_ph;
1087 
1088 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1089 		if ((eppnt + i)->p_type == PT_LOAD)
1090 			j++;
1091 	if (j != 1)
1092 		goto out_free_ph;
1093 
1094 	while (eppnt->p_type != PT_LOAD)
1095 		eppnt++;
1096 
1097 	/* Now use mmap to map the library into memory. */
1098 	down_write(&current->mm->mmap_sem);
1099 	error = do_mmap(file,
1100 			ELF_PAGESTART(eppnt->p_vaddr),
1101 			(eppnt->p_filesz +
1102 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1103 			PROT_READ | PROT_WRITE | PROT_EXEC,
1104 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1105 			(eppnt->p_offset -
1106 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1107 	up_write(&current->mm->mmap_sem);
1108 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1109 		goto out_free_ph;
1110 
1111 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1112 	if (padzero(elf_bss)) {
1113 		error = -EFAULT;
1114 		goto out_free_ph;
1115 	}
1116 
1117 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1118 			    ELF_MIN_ALIGN - 1);
1119 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1120 	if (bss > len) {
1121 		down_write(&current->mm->mmap_sem);
1122 		do_brk(len, bss - len);
1123 		up_write(&current->mm->mmap_sem);
1124 	}
1125 	error = 0;
1126 
1127 out_free_ph:
1128 	kfree(elf_phdata);
1129 out:
1130 	return error;
1131 }
1132 
1133 /*
1134  * Note that some platforms still use traditional core dumps and not
1135  * the ELF core dump.  Each platform can select it as appropriate.
1136  */
1137 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1138 
1139 /*
1140  * ELF core dumper
1141  *
1142  * Modelled on fs/exec.c:aout_core_dump()
1143  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1144  */
1145 /*
1146  * These are the only things you should do on a core-file: use only these
1147  * functions to write out all the necessary info.
1148  */
1149 static int dump_write(struct file *file, const void *addr, int nr)
1150 {
1151 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1152 }
1153 
1154 static int dump_seek(struct file *file, loff_t off)
1155 {
1156 	if (file->f_op->llseek) {
1157 		if (file->f_op->llseek(file, off, 0) != off)
1158 			return 0;
1159 	} else
1160 		file->f_pos = off;
1161 	return 1;
1162 }
1163 
1164 /*
1165  * Decide whether a segment is worth dumping; default is yes to be
1166  * sure (missing info is worse than too much; etc).
1167  * Personally I'd include everything, and use the coredump limit...
1168  *
1169  * I think we should skip something. But I am not sure how. H.J.
1170  */
1171 static int maydump(struct vm_area_struct *vma)
1172 {
1173 	/* Do not dump I/O mapped devices or special mappings */
1174 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1175 		return 0;
1176 
1177 	/* Dump shared memory only if mapped from an anonymous file. */
1178 	if (vma->vm_flags & VM_SHARED)
1179 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1180 
1181 	/* If it hasn't been written to, don't write it out */
1182 	if (!vma->anon_vma)
1183 		return 0;
1184 
1185 	return 1;
1186 }
1187 
1188 /* An ELF note in memory */
1189 struct memelfnote
1190 {
1191 	const char *name;
1192 	int type;
1193 	unsigned int datasz;
1194 	void *data;
1195 };
1196 
1197 static int notesize(struct memelfnote *en)
1198 {
1199 	int sz;
1200 
1201 	sz = sizeof(struct elf_note);
1202 	sz += roundup(strlen(en->name) + 1, 4);
1203 	sz += roundup(en->datasz, 4);
1204 
1205 	return sz;
1206 }
1207 
1208 #define DUMP_WRITE(addr, nr)	\
1209 	do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1210 #define DUMP_SEEK(off)	\
1211 	do { if (!dump_seek(file, (off))) return 0; } while(0)
1212 
1213 static int writenote(struct memelfnote *men, struct file *file)
1214 {
1215 	struct elf_note en;
1216 
1217 	en.n_namesz = strlen(men->name) + 1;
1218 	en.n_descsz = men->datasz;
1219 	en.n_type = men->type;
1220 
1221 	DUMP_WRITE(&en, sizeof(en));
1222 	DUMP_WRITE(men->name, en.n_namesz);
1223 	/* XXX - cast from long long to long to avoid need for libgcc.a */
1224 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1225 	DUMP_WRITE(men->data, men->datasz);
1226 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1227 
1228 	return 1;
1229 }
1230 #undef DUMP_WRITE
1231 #undef DUMP_SEEK
1232 
1233 #define DUMP_WRITE(addr, nr)	\
1234 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1235 		goto end_coredump;
1236 #define DUMP_SEEK(off)	\
1237 	if (!dump_seek(file, (off))) \
1238 		goto end_coredump;
1239 
1240 static void fill_elf_header(struct elfhdr *elf, int segs)
1241 {
1242 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1243 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1244 	elf->e_ident[EI_DATA] = ELF_DATA;
1245 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1246 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1247 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1248 
1249 	elf->e_type = ET_CORE;
1250 	elf->e_machine = ELF_ARCH;
1251 	elf->e_version = EV_CURRENT;
1252 	elf->e_entry = 0;
1253 	elf->e_phoff = sizeof(struct elfhdr);
1254 	elf->e_shoff = 0;
1255 	elf->e_flags = ELF_CORE_EFLAGS;
1256 	elf->e_ehsize = sizeof(struct elfhdr);
1257 	elf->e_phentsize = sizeof(struct elf_phdr);
1258 	elf->e_phnum = segs;
1259 	elf->e_shentsize = 0;
1260 	elf->e_shnum = 0;
1261 	elf->e_shstrndx = 0;
1262 	return;
1263 }
1264 
1265 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1266 {
1267 	phdr->p_type = PT_NOTE;
1268 	phdr->p_offset = offset;
1269 	phdr->p_vaddr = 0;
1270 	phdr->p_paddr = 0;
1271 	phdr->p_filesz = sz;
1272 	phdr->p_memsz = 0;
1273 	phdr->p_flags = 0;
1274 	phdr->p_align = 0;
1275 	return;
1276 }
1277 
1278 static void fill_note(struct memelfnote *note, const char *name, int type,
1279 		unsigned int sz, void *data)
1280 {
1281 	note->name = name;
1282 	note->type = type;
1283 	note->datasz = sz;
1284 	note->data = data;
1285 	return;
1286 }
1287 
1288 /*
1289  * fill up all the fields in prstatus from the given task struct, except
1290  * registers which need to be filled up separately.
1291  */
1292 static void fill_prstatus(struct elf_prstatus *prstatus,
1293 		struct task_struct *p, long signr)
1294 {
1295 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1296 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1297 	prstatus->pr_sighold = p->blocked.sig[0];
1298 	prstatus->pr_pid = p->pid;
1299 	prstatus->pr_ppid = p->parent->pid;
1300 	prstatus->pr_pgrp = process_group(p);
1301 	prstatus->pr_sid = p->signal->session;
1302 	if (thread_group_leader(p)) {
1303 		/*
1304 		 * This is the record for the group leader.  Add in the
1305 		 * cumulative times of previous dead threads.  This total
1306 		 * won't include the time of each live thread whose state
1307 		 * is included in the core dump.  The final total reported
1308 		 * to our parent process when it calls wait4 will include
1309 		 * those sums as well as the little bit more time it takes
1310 		 * this and each other thread to finish dying after the
1311 		 * core dump synchronization phase.
1312 		 */
1313 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1314 				   &prstatus->pr_utime);
1315 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1316 				   &prstatus->pr_stime);
1317 	} else {
1318 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1319 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1320 	}
1321 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1322 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1323 }
1324 
1325 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1326 		       struct mm_struct *mm)
1327 {
1328 	unsigned int i, len;
1329 
1330 	/* first copy the parameters from user space */
1331 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1332 
1333 	len = mm->arg_end - mm->arg_start;
1334 	if (len >= ELF_PRARGSZ)
1335 		len = ELF_PRARGSZ-1;
1336 	if (copy_from_user(&psinfo->pr_psargs,
1337 		           (const char __user *)mm->arg_start, len))
1338 		return -EFAULT;
1339 	for(i = 0; i < len; i++)
1340 		if (psinfo->pr_psargs[i] == 0)
1341 			psinfo->pr_psargs[i] = ' ';
1342 	psinfo->pr_psargs[len] = 0;
1343 
1344 	psinfo->pr_pid = p->pid;
1345 	psinfo->pr_ppid = p->parent->pid;
1346 	psinfo->pr_pgrp = process_group(p);
1347 	psinfo->pr_sid = p->signal->session;
1348 
1349 	i = p->state ? ffz(~p->state) + 1 : 0;
1350 	psinfo->pr_state = i;
1351 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1352 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1353 	psinfo->pr_nice = task_nice(p);
1354 	psinfo->pr_flag = p->flags;
1355 	SET_UID(psinfo->pr_uid, p->uid);
1356 	SET_GID(psinfo->pr_gid, p->gid);
1357 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1358 
1359 	return 0;
1360 }
1361 
1362 /* Here is the structure in which status of each thread is captured. */
1363 struct elf_thread_status
1364 {
1365 	struct list_head list;
1366 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1367 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1368 	struct task_struct *thread;
1369 #ifdef ELF_CORE_COPY_XFPREGS
1370 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1371 #endif
1372 	struct memelfnote notes[3];
1373 	int num_notes;
1374 };
1375 
1376 /*
1377  * In order to add the specific thread information for the elf file format,
1378  * we need to keep a linked list of every threads pr_status and then create
1379  * a single section for them in the final core file.
1380  */
1381 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1382 {
1383 	int sz = 0;
1384 	struct task_struct *p = t->thread;
1385 	t->num_notes = 0;
1386 
1387 	fill_prstatus(&t->prstatus, p, signr);
1388 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1389 
1390 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1391 		  &(t->prstatus));
1392 	t->num_notes++;
1393 	sz += notesize(&t->notes[0]);
1394 
1395 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1396 								&t->fpu))) {
1397 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1398 			  &(t->fpu));
1399 		t->num_notes++;
1400 		sz += notesize(&t->notes[1]);
1401 	}
1402 
1403 #ifdef ELF_CORE_COPY_XFPREGS
1404 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1405 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1406 			  &t->xfpu);
1407 		t->num_notes++;
1408 		sz += notesize(&t->notes[2]);
1409 	}
1410 #endif
1411 	return sz;
1412 }
1413 
1414 /*
1415  * Actual dumper
1416  *
1417  * This is a two-pass process; first we find the offsets of the bits,
1418  * and then they are actually written out.  If we run out of core limit
1419  * we just truncate.
1420  */
1421 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1422 {
1423 #define	NUM_NOTES	6
1424 	int has_dumped = 0;
1425 	mm_segment_t fs;
1426 	int segs;
1427 	size_t size = 0;
1428 	int i;
1429 	struct vm_area_struct *vma;
1430 	struct elfhdr *elf = NULL;
1431 	off_t offset = 0, dataoff;
1432 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1433 	int numnote;
1434 	struct memelfnote *notes = NULL;
1435 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1436 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1437  	struct task_struct *g, *p;
1438  	LIST_HEAD(thread_list);
1439  	struct list_head *t;
1440 	elf_fpregset_t *fpu = NULL;
1441 #ifdef ELF_CORE_COPY_XFPREGS
1442 	elf_fpxregset_t *xfpu = NULL;
1443 #endif
1444 	int thread_status_size = 0;
1445 	elf_addr_t *auxv;
1446 
1447 	/*
1448 	 * We no longer stop all VM operations.
1449 	 *
1450 	 * This is because those proceses that could possibly change map_count
1451 	 * or the mmap / vma pages are now blocked in do_exit on current
1452 	 * finishing this core dump.
1453 	 *
1454 	 * Only ptrace can touch these memory addresses, but it doesn't change
1455 	 * the map_count or the pages allocated. So no possibility of crashing
1456 	 * exists while dumping the mm->vm_next areas to the core file.
1457 	 */
1458 
1459 	/* alloc memory for large data structures: too large to be on stack */
1460 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1461 	if (!elf)
1462 		goto cleanup;
1463 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1464 	if (!prstatus)
1465 		goto cleanup;
1466 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1467 	if (!psinfo)
1468 		goto cleanup;
1469 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1470 	if (!notes)
1471 		goto cleanup;
1472 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1473 	if (!fpu)
1474 		goto cleanup;
1475 #ifdef ELF_CORE_COPY_XFPREGS
1476 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1477 	if (!xfpu)
1478 		goto cleanup;
1479 #endif
1480 
1481 	if (signr) {
1482 		struct elf_thread_status *tmp;
1483 		read_lock(&tasklist_lock);
1484 		do_each_thread(g,p)
1485 			if (current->mm == p->mm && current != p) {
1486 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1487 				if (!tmp) {
1488 					read_unlock(&tasklist_lock);
1489 					goto cleanup;
1490 				}
1491 				INIT_LIST_HEAD(&tmp->list);
1492 				tmp->thread = p;
1493 				list_add(&tmp->list, &thread_list);
1494 			}
1495 		while_each_thread(g,p);
1496 		read_unlock(&tasklist_lock);
1497 		list_for_each(t, &thread_list) {
1498 			struct elf_thread_status *tmp;
1499 			int sz;
1500 
1501 			tmp = list_entry(t, struct elf_thread_status, list);
1502 			sz = elf_dump_thread_status(signr, tmp);
1503 			thread_status_size += sz;
1504 		}
1505 	}
1506 	/* now collect the dump for the current */
1507 	memset(prstatus, 0, sizeof(*prstatus));
1508 	fill_prstatus(prstatus, current, signr);
1509 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1510 
1511 	segs = current->mm->map_count;
1512 #ifdef ELF_CORE_EXTRA_PHDRS
1513 	segs += ELF_CORE_EXTRA_PHDRS;
1514 #endif
1515 
1516 	/* Set up header */
1517 	fill_elf_header(elf, segs + 1);	/* including notes section */
1518 
1519 	has_dumped = 1;
1520 	current->flags |= PF_DUMPCORE;
1521 
1522 	/*
1523 	 * Set up the notes in similar form to SVR4 core dumps made
1524 	 * with info from their /proc.
1525 	 */
1526 
1527 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1528 	fill_psinfo(psinfo, current->group_leader, current->mm);
1529 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1530 
1531 	numnote = 2;
1532 
1533 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1534 
1535 	i = 0;
1536 	do
1537 		i += 2;
1538 	while (auxv[i - 2] != AT_NULL);
1539 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1540 		  i * sizeof(elf_addr_t), auxv);
1541 
1542   	/* Try to dump the FPU. */
1543 	if ((prstatus->pr_fpvalid =
1544 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1545 		fill_note(notes + numnote++,
1546 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1547 #ifdef ELF_CORE_COPY_XFPREGS
1548 	if (elf_core_copy_task_xfpregs(current, xfpu))
1549 		fill_note(notes + numnote++,
1550 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1551 #endif
1552 
1553 	fs = get_fs();
1554 	set_fs(KERNEL_DS);
1555 
1556 	DUMP_WRITE(elf, sizeof(*elf));
1557 	offset += sizeof(*elf);				/* Elf header */
1558 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
1559 
1560 	/* Write notes phdr entry */
1561 	{
1562 		struct elf_phdr phdr;
1563 		int sz = 0;
1564 
1565 		for (i = 0; i < numnote; i++)
1566 			sz += notesize(notes + i);
1567 
1568 		sz += thread_status_size;
1569 
1570 		fill_elf_note_phdr(&phdr, sz, offset);
1571 		offset += sz;
1572 		DUMP_WRITE(&phdr, sizeof(phdr));
1573 	}
1574 
1575 	/* Page-align dumped data */
1576 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1577 
1578 	/* Write program headers for segments dump */
1579 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1580 		struct elf_phdr phdr;
1581 		size_t sz;
1582 
1583 		sz = vma->vm_end - vma->vm_start;
1584 
1585 		phdr.p_type = PT_LOAD;
1586 		phdr.p_offset = offset;
1587 		phdr.p_vaddr = vma->vm_start;
1588 		phdr.p_paddr = 0;
1589 		phdr.p_filesz = maydump(vma) ? sz : 0;
1590 		phdr.p_memsz = sz;
1591 		offset += phdr.p_filesz;
1592 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1593 		if (vma->vm_flags & VM_WRITE)
1594 			phdr.p_flags |= PF_W;
1595 		if (vma->vm_flags & VM_EXEC)
1596 			phdr.p_flags |= PF_X;
1597 		phdr.p_align = ELF_EXEC_PAGESIZE;
1598 
1599 		DUMP_WRITE(&phdr, sizeof(phdr));
1600 	}
1601 
1602 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1603 	ELF_CORE_WRITE_EXTRA_PHDRS;
1604 #endif
1605 
1606  	/* write out the notes section */
1607 	for (i = 0; i < numnote; i++)
1608 		if (!writenote(notes + i, file))
1609 			goto end_coredump;
1610 
1611 	/* write out the thread status notes section */
1612 	list_for_each(t, &thread_list) {
1613 		struct elf_thread_status *tmp =
1614 				list_entry(t, struct elf_thread_status, list);
1615 
1616 		for (i = 0; i < tmp->num_notes; i++)
1617 			if (!writenote(&tmp->notes[i], file))
1618 				goto end_coredump;
1619 	}
1620 
1621 	DUMP_SEEK(dataoff);
1622 
1623 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1624 		unsigned long addr;
1625 
1626 		if (!maydump(vma))
1627 			continue;
1628 
1629 		for (addr = vma->vm_start;
1630 		     addr < vma->vm_end;
1631 		     addr += PAGE_SIZE) {
1632 			struct page *page;
1633 			struct vm_area_struct *vma;
1634 
1635 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1636 						&page, &vma) <= 0) {
1637 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
1638 			} else {
1639 				if (page == ZERO_PAGE(addr)) {
1640 					DUMP_SEEK(file->f_pos + PAGE_SIZE);
1641 				} else {
1642 					void *kaddr;
1643 					flush_cache_page(vma, addr,
1644 							 page_to_pfn(page));
1645 					kaddr = kmap(page);
1646 					if ((size += PAGE_SIZE) > limit ||
1647 					    !dump_write(file, kaddr,
1648 					    PAGE_SIZE)) {
1649 						kunmap(page);
1650 						page_cache_release(page);
1651 						goto end_coredump;
1652 					}
1653 					kunmap(page);
1654 				}
1655 				page_cache_release(page);
1656 			}
1657 		}
1658 	}
1659 
1660 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1661 	ELF_CORE_WRITE_EXTRA_DATA;
1662 #endif
1663 
1664 	if ((off_t)file->f_pos != offset) {
1665 		/* Sanity check */
1666 		printk(KERN_WARNING
1667 		       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1668 		       (off_t)file->f_pos, offset);
1669 	}
1670 
1671 end_coredump:
1672 	set_fs(fs);
1673 
1674 cleanup:
1675 	while (!list_empty(&thread_list)) {
1676 		struct list_head *tmp = thread_list.next;
1677 		list_del(tmp);
1678 		kfree(list_entry(tmp, struct elf_thread_status, list));
1679 	}
1680 
1681 	kfree(elf);
1682 	kfree(prstatus);
1683 	kfree(psinfo);
1684 	kfree(notes);
1685 	kfree(fpu);
1686 #ifdef ELF_CORE_COPY_XFPREGS
1687 	kfree(xfpu);
1688 #endif
1689 	return has_dumped;
1690 #undef NUM_NOTES
1691 }
1692 
1693 #endif		/* USE_ELF_CORE_DUMP */
1694 
1695 static int __init init_elf_binfmt(void)
1696 {
1697 	return register_binfmt(&elf_format);
1698 }
1699 
1700 static void __exit exit_elf_binfmt(void)
1701 {
1702 	/* Remove the COFF and ELF loaders. */
1703 	unregister_binfmt(&elf_format);
1704 }
1705 
1706 core_initcall(init_elf_binfmt);
1707 module_exit(exit_elf_binfmt);
1708 MODULE_LICENSE("GPL");
1709