xref: /linux/fs/binfmt_elf.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11 
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/smp_lock.h>
35 #include <linux/compiler.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38 #include <linux/security.h>
39 #include <linux/syscalls.h>
40 #include <linux/random.h>
41 #include <linux/elf.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45 
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
49 extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
50 
51 #ifndef elf_addr_t
52 #define elf_addr_t unsigned long
53 #endif
54 
55 /*
56  * If we don't support core dumping, then supply a NULL so we
57  * don't even try.
58  */
59 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
60 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
61 #else
62 #define elf_core_dump	NULL
63 #endif
64 
65 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
66 #define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
67 #else
68 #define ELF_MIN_ALIGN	PAGE_SIZE
69 #endif
70 
71 #ifndef ELF_CORE_EFLAGS
72 #define ELF_CORE_EFLAGS	0
73 #endif
74 
75 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
76 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
77 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
78 
79 static struct linux_binfmt elf_format = {
80 		.module		= THIS_MODULE,
81 		.load_binary	= load_elf_binary,
82 		.load_shlib	= load_elf_library,
83 		.core_dump	= elf_core_dump,
84 		.min_coredump	= ELF_EXEC_PAGESIZE
85 };
86 
87 #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
88 
89 static int set_brk(unsigned long start, unsigned long end)
90 {
91 	start = ELF_PAGEALIGN(start);
92 	end = ELF_PAGEALIGN(end);
93 	if (end > start) {
94 		unsigned long addr;
95 		down_write(&current->mm->mmap_sem);
96 		addr = do_brk(start, end - start);
97 		up_write(&current->mm->mmap_sem);
98 		if (BAD_ADDR(addr))
99 			return addr;
100 	}
101 	current->mm->start_brk = current->mm->brk = end;
102 	return 0;
103 }
104 
105 /* We need to explicitly zero any fractional pages
106    after the data section (i.e. bss).  This would
107    contain the junk from the file that should not
108    be in memory
109  */
110 static int padzero(unsigned long elf_bss)
111 {
112 	unsigned long nbyte;
113 
114 	nbyte = ELF_PAGEOFFSET(elf_bss);
115 	if (nbyte) {
116 		nbyte = ELF_MIN_ALIGN - nbyte;
117 		if (clear_user((void __user *) elf_bss, nbyte))
118 			return -EFAULT;
119 	}
120 	return 0;
121 }
122 
123 /* Let's use some macros to make this stack manipulation a litle clearer */
124 #ifdef CONFIG_STACK_GROWSUP
125 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
126 #define STACK_ROUND(sp, items) \
127 	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
128 #define STACK_ALLOC(sp, len) ({ \
129 	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
130 	old_sp; })
131 #else
132 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
133 #define STACK_ROUND(sp, items) \
134 	(((unsigned long) (sp - items)) &~ 15UL)
135 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
136 #endif
137 
138 static int
139 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
140 		int interp_aout, unsigned long load_addr,
141 		unsigned long interp_load_addr)
142 {
143 	unsigned long p = bprm->p;
144 	int argc = bprm->argc;
145 	int envc = bprm->envc;
146 	elf_addr_t __user *argv;
147 	elf_addr_t __user *envp;
148 	elf_addr_t __user *sp;
149 	elf_addr_t __user *u_platform;
150 	const char *k_platform = ELF_PLATFORM;
151 	int items;
152 	elf_addr_t *elf_info;
153 	int ei_index = 0;
154 	struct task_struct *tsk = current;
155 
156 	/*
157 	 * If this architecture has a platform capability string, copy it
158 	 * to userspace.  In some cases (Sparc), this info is impossible
159 	 * for userspace to get any other way, in others (i386) it is
160 	 * merely difficult.
161 	 */
162 	u_platform = NULL;
163 	if (k_platform) {
164 		size_t len = strlen(k_platform) + 1;
165 
166 		/*
167 		 * In some cases (e.g. Hyper-Threading), we want to avoid L1
168 		 * evictions by the processes running on the same package. One
169 		 * thing we can do is to shuffle the initial stack for them.
170 		 */
171 
172 		p = arch_align_stack(p);
173 
174 		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
175 		if (__copy_to_user(u_platform, k_platform, len))
176 			return -EFAULT;
177 	}
178 
179 	/* Create the ELF interpreter info */
180 	elf_info = (elf_addr_t *)current->mm->saved_auxv;
181 #define NEW_AUX_ENT(id, val) \
182 	do { \
183 		elf_info[ei_index++] = id; \
184 		elf_info[ei_index++] = val; \
185 	} while (0)
186 
187 #ifdef ARCH_DLINFO
188 	/*
189 	 * ARCH_DLINFO must come first so PPC can do its special alignment of
190 	 * AUXV.
191 	 */
192 	ARCH_DLINFO;
193 #endif
194 	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
195 	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
196 	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
197 	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
198 	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
199 	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
200 	NEW_AUX_ENT(AT_BASE, interp_load_addr);
201 	NEW_AUX_ENT(AT_FLAGS, 0);
202 	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
203 	NEW_AUX_ENT(AT_UID, tsk->uid);
204 	NEW_AUX_ENT(AT_EUID, tsk->euid);
205 	NEW_AUX_ENT(AT_GID, tsk->gid);
206 	NEW_AUX_ENT(AT_EGID, tsk->egid);
207  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
208 	if (k_platform) {
209 		NEW_AUX_ENT(AT_PLATFORM,
210 			    (elf_addr_t)(unsigned long)u_platform);
211 	}
212 	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213 		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214 	}
215 #undef NEW_AUX_ENT
216 	/* AT_NULL is zero; clear the rest too */
217 	memset(&elf_info[ei_index], 0,
218 	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219 
220 	/* And advance past the AT_NULL entry.  */
221 	ei_index += 2;
222 
223 	sp = STACK_ADD(p, ei_index);
224 
225 	items = (argc + 1) + (envc + 1);
226 	if (interp_aout) {
227 		items += 3; /* a.out interpreters require argv & envp too */
228 	} else {
229 		items += 1; /* ELF interpreters only put argc on the stack */
230 	}
231 	bprm->p = STACK_ROUND(sp, items);
232 
233 	/* Point sp at the lowest address on the stack */
234 #ifdef CONFIG_STACK_GROWSUP
235 	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
236 	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
237 #else
238 	sp = (elf_addr_t __user *)bprm->p;
239 #endif
240 
241 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
242 	if (__put_user(argc, sp++))
243 		return -EFAULT;
244 	if (interp_aout) {
245 		argv = sp + 2;
246 		envp = argv + argc + 1;
247 		__put_user((elf_addr_t)(unsigned long)argv, sp++);
248 		__put_user((elf_addr_t)(unsigned long)envp, sp++);
249 	} else {
250 		argv = sp;
251 		envp = argv + argc + 1;
252 	}
253 
254 	/* Populate argv and envp */
255 	p = current->mm->arg_end = current->mm->arg_start;
256 	while (argc-- > 0) {
257 		size_t len;
258 		__put_user((elf_addr_t)p, argv++);
259 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
260 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
261 			return 0;
262 		p += len;
263 	}
264 	if (__put_user(0, argv))
265 		return -EFAULT;
266 	current->mm->arg_end = current->mm->env_start = p;
267 	while (envc-- > 0) {
268 		size_t len;
269 		__put_user((elf_addr_t)p, envp++);
270 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
271 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
272 			return 0;
273 		p += len;
274 	}
275 	if (__put_user(0, envp))
276 		return -EFAULT;
277 	current->mm->env_end = p;
278 
279 	/* Put the elf_info on the stack in the right place.  */
280 	sp = (elf_addr_t __user *)envp + 1;
281 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
282 		return -EFAULT;
283 	return 0;
284 }
285 
286 #ifndef elf_map
287 
288 static unsigned long elf_map(struct file *filep, unsigned long addr,
289 		struct elf_phdr *eppnt, int prot, int type)
290 {
291 	unsigned long map_addr;
292 	unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
293 
294 	down_write(&current->mm->mmap_sem);
295 	/* mmap() will return -EINVAL if given a zero size, but a
296 	 * segment with zero filesize is perfectly valid */
297 	if (eppnt->p_filesz + pageoffset)
298 		map_addr = do_mmap(filep, ELF_PAGESTART(addr),
299 				   eppnt->p_filesz + pageoffset, prot, type,
300 				   eppnt->p_offset - pageoffset);
301 	else
302 		map_addr = ELF_PAGESTART(addr);
303 	up_write(&current->mm->mmap_sem);
304 	return(map_addr);
305 }
306 
307 #endif /* !elf_map */
308 
309 /* This is much more generalized than the library routine read function,
310    so we keep this separate.  Technically the library read function
311    is only provided so that we can read a.out libraries that have
312    an ELF header */
313 
314 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
315 		struct file *interpreter, unsigned long *interp_load_addr)
316 {
317 	struct elf_phdr *elf_phdata;
318 	struct elf_phdr *eppnt;
319 	unsigned long load_addr = 0;
320 	int load_addr_set = 0;
321 	unsigned long last_bss = 0, elf_bss = 0;
322 	unsigned long error = ~0UL;
323 	int retval, i, size;
324 
325 	/* First of all, some simple consistency checks */
326 	if (interp_elf_ex->e_type != ET_EXEC &&
327 	    interp_elf_ex->e_type != ET_DYN)
328 		goto out;
329 	if (!elf_check_arch(interp_elf_ex))
330 		goto out;
331 	if (!interpreter->f_op || !interpreter->f_op->mmap)
332 		goto out;
333 
334 	/*
335 	 * If the size of this structure has changed, then punt, since
336 	 * we will be doing the wrong thing.
337 	 */
338 	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
339 		goto out;
340 	if (interp_elf_ex->e_phnum < 1 ||
341 		interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
342 		goto out;
343 
344 	/* Now read in all of the header information */
345 	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
346 	if (size > ELF_MIN_ALIGN)
347 		goto out;
348 	elf_phdata = kmalloc(size, GFP_KERNEL);
349 	if (!elf_phdata)
350 		goto out;
351 
352 	retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
353 			     (char *)elf_phdata,size);
354 	error = -EIO;
355 	if (retval != size) {
356 		if (retval < 0)
357 			error = retval;
358 		goto out_close;
359 	}
360 
361 	eppnt = elf_phdata;
362 	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
363 		if (eppnt->p_type == PT_LOAD) {
364 			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
365 			int elf_prot = 0;
366 			unsigned long vaddr = 0;
367 			unsigned long k, map_addr;
368 
369 			if (eppnt->p_flags & PF_R)
370 		    		elf_prot = PROT_READ;
371 			if (eppnt->p_flags & PF_W)
372 				elf_prot |= PROT_WRITE;
373 			if (eppnt->p_flags & PF_X)
374 				elf_prot |= PROT_EXEC;
375 			vaddr = eppnt->p_vaddr;
376 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
377 				elf_type |= MAP_FIXED;
378 
379 			map_addr = elf_map(interpreter, load_addr + vaddr,
380 					   eppnt, elf_prot, elf_type);
381 			error = map_addr;
382 			if (BAD_ADDR(map_addr))
383 				goto out_close;
384 
385 			if (!load_addr_set &&
386 			    interp_elf_ex->e_type == ET_DYN) {
387 				load_addr = map_addr - ELF_PAGESTART(vaddr);
388 				load_addr_set = 1;
389 			}
390 
391 			/*
392 			 * Check to see if the section's size will overflow the
393 			 * allowed task size. Note that p_filesz must always be
394 			 * <= p_memsize so it's only necessary to check p_memsz.
395 			 */
396 			k = load_addr + eppnt->p_vaddr;
397 			if (k > TASK_SIZE ||
398 			    eppnt->p_filesz > eppnt->p_memsz ||
399 			    eppnt->p_memsz > TASK_SIZE ||
400 			    TASK_SIZE - eppnt->p_memsz < k) {
401 				error = -ENOMEM;
402 				goto out_close;
403 			}
404 
405 			/*
406 			 * Find the end of the file mapping for this phdr, and
407 			 * keep track of the largest address we see for this.
408 			 */
409 			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
410 			if (k > elf_bss)
411 				elf_bss = k;
412 
413 			/*
414 			 * Do the same thing for the memory mapping - between
415 			 * elf_bss and last_bss is the bss section.
416 			 */
417 			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
418 			if (k > last_bss)
419 				last_bss = k;
420 		}
421 	}
422 
423 	/*
424 	 * Now fill out the bss section.  First pad the last page up
425 	 * to the page boundary, and then perform a mmap to make sure
426 	 * that there are zero-mapped pages up to and including the
427 	 * last bss page.
428 	 */
429 	if (padzero(elf_bss)) {
430 		error = -EFAULT;
431 		goto out_close;
432 	}
433 
434 	/* What we have mapped so far */
435 	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
436 
437 	/* Map the last of the bss segment */
438 	if (last_bss > elf_bss) {
439 		down_write(&current->mm->mmap_sem);
440 		error = do_brk(elf_bss, last_bss - elf_bss);
441 		up_write(&current->mm->mmap_sem);
442 		if (BAD_ADDR(error))
443 			goto out_close;
444 	}
445 
446 	*interp_load_addr = load_addr;
447 	error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
448 
449 out_close:
450 	kfree(elf_phdata);
451 out:
452 	return error;
453 }
454 
455 static unsigned long load_aout_interp(struct exec *interp_ex,
456 		struct file *interpreter)
457 {
458 	unsigned long text_data, elf_entry = ~0UL;
459 	char __user * addr;
460 	loff_t offset;
461 
462 	current->mm->end_code = interp_ex->a_text;
463 	text_data = interp_ex->a_text + interp_ex->a_data;
464 	current->mm->end_data = text_data;
465 	current->mm->brk = interp_ex->a_bss + text_data;
466 
467 	switch (N_MAGIC(*interp_ex)) {
468 	case OMAGIC:
469 		offset = 32;
470 		addr = (char __user *)0;
471 		break;
472 	case ZMAGIC:
473 	case QMAGIC:
474 		offset = N_TXTOFF(*interp_ex);
475 		addr = (char __user *)N_TXTADDR(*interp_ex);
476 		break;
477 	default:
478 		goto out;
479 	}
480 
481 	down_write(&current->mm->mmap_sem);
482 	do_brk(0, text_data);
483 	up_write(&current->mm->mmap_sem);
484 	if (!interpreter->f_op || !interpreter->f_op->read)
485 		goto out;
486 	if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
487 		goto out;
488 	flush_icache_range((unsigned long)addr,
489 	                   (unsigned long)addr + text_data);
490 
491 	down_write(&current->mm->mmap_sem);
492 	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
493 		interp_ex->a_bss);
494 	up_write(&current->mm->mmap_sem);
495 	elf_entry = interp_ex->a_entry;
496 
497 out:
498 	return elf_entry;
499 }
500 
501 /*
502  * These are the functions used to load ELF style executables and shared
503  * libraries.  There is no binary dependent code anywhere else.
504  */
505 
506 #define INTERPRETER_NONE 0
507 #define INTERPRETER_AOUT 1
508 #define INTERPRETER_ELF 2
509 
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
512 #endif
513 
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516 	unsigned int random_variable = 0;
517 
518 	if (current->flags & PF_RANDOMIZE) {
519 		random_variable = get_random_int() & STACK_RND_MASK;
520 		random_variable <<= PAGE_SHIFT;
521 	}
522 #ifdef CONFIG_STACK_GROWSUP
523 	return PAGE_ALIGN(stack_top) + random_variable;
524 #else
525 	return PAGE_ALIGN(stack_top) - random_variable;
526 #endif
527 }
528 
529 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
530 {
531 	struct file *interpreter = NULL; /* to shut gcc up */
532  	unsigned long load_addr = 0, load_bias = 0;
533 	int load_addr_set = 0;
534 	char * elf_interpreter = NULL;
535 	unsigned int interpreter_type = INTERPRETER_NONE;
536 	unsigned char ibcs2_interpreter = 0;
537 	unsigned long error;
538 	struct elf_phdr *elf_ppnt, *elf_phdata;
539 	unsigned long elf_bss, elf_brk;
540 	int elf_exec_fileno;
541 	int retval, i;
542 	unsigned int size;
543 	unsigned long elf_entry, interp_load_addr = 0;
544 	unsigned long start_code, end_code, start_data, end_data;
545 	unsigned long reloc_func_desc = 0;
546 	char passed_fileno[6];
547 	struct files_struct *files;
548 	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
549 	unsigned long def_flags = 0;
550 	struct {
551 		struct elfhdr elf_ex;
552 		struct elfhdr interp_elf_ex;
553   		struct exec interp_ex;
554 	} *loc;
555 
556 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
557 	if (!loc) {
558 		retval = -ENOMEM;
559 		goto out_ret;
560 	}
561 
562 	/* Get the exec-header */
563 	loc->elf_ex = *((struct elfhdr *)bprm->buf);
564 
565 	retval = -ENOEXEC;
566 	/* First of all, some simple consistency checks */
567 	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
568 		goto out;
569 
570 	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
571 		goto out;
572 	if (!elf_check_arch(&loc->elf_ex))
573 		goto out;
574 	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
575 		goto out;
576 
577 	/* Now read in all of the header information */
578 	if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
579 		goto out;
580 	if (loc->elf_ex.e_phnum < 1 ||
581 	 	loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
582 		goto out;
583 	size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
584 	retval = -ENOMEM;
585 	elf_phdata = kmalloc(size, GFP_KERNEL);
586 	if (!elf_phdata)
587 		goto out;
588 
589 	retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
590 			     (char *)elf_phdata, size);
591 	if (retval != size) {
592 		if (retval >= 0)
593 			retval = -EIO;
594 		goto out_free_ph;
595 	}
596 
597 	files = current->files;	/* Refcounted so ok */
598 	retval = unshare_files();
599 	if (retval < 0)
600 		goto out_free_ph;
601 	if (files == current->files) {
602 		put_files_struct(files);
603 		files = NULL;
604 	}
605 
606 	/* exec will make our files private anyway, but for the a.out
607 	   loader stuff we need to do it earlier */
608 	retval = get_unused_fd();
609 	if (retval < 0)
610 		goto out_free_fh;
611 	get_file(bprm->file);
612 	fd_install(elf_exec_fileno = retval, bprm->file);
613 
614 	elf_ppnt = elf_phdata;
615 	elf_bss = 0;
616 	elf_brk = 0;
617 
618 	start_code = ~0UL;
619 	end_code = 0;
620 	start_data = 0;
621 	end_data = 0;
622 
623 	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
624 		if (elf_ppnt->p_type == PT_INTERP) {
625 			/* This is the program interpreter used for
626 			 * shared libraries - for now assume that this
627 			 * is an a.out format binary
628 			 */
629 			retval = -ENOEXEC;
630 			if (elf_ppnt->p_filesz > PATH_MAX ||
631 			    elf_ppnt->p_filesz < 2)
632 				goto out_free_file;
633 
634 			retval = -ENOMEM;
635 			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
636 						  GFP_KERNEL);
637 			if (!elf_interpreter)
638 				goto out_free_file;
639 
640 			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
641 					     elf_interpreter,
642 					     elf_ppnt->p_filesz);
643 			if (retval != elf_ppnt->p_filesz) {
644 				if (retval >= 0)
645 					retval = -EIO;
646 				goto out_free_interp;
647 			}
648 			/* make sure path is NULL terminated */
649 			retval = -ENOEXEC;
650 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
651 				goto out_free_interp;
652 
653 			/* If the program interpreter is one of these two,
654 			 * then assume an iBCS2 image. Otherwise assume
655 			 * a native linux image.
656 			 */
657 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
658 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
659 				ibcs2_interpreter = 1;
660 
661 			/*
662 			 * The early SET_PERSONALITY here is so that the lookup
663 			 * for the interpreter happens in the namespace of the
664 			 * to-be-execed image.  SET_PERSONALITY can select an
665 			 * alternate root.
666 			 *
667 			 * However, SET_PERSONALITY is NOT allowed to switch
668 			 * this task into the new images's memory mapping
669 			 * policy - that is, TASK_SIZE must still evaluate to
670 			 * that which is appropriate to the execing application.
671 			 * This is because exit_mmap() needs to have TASK_SIZE
672 			 * evaluate to the size of the old image.
673 			 *
674 			 * So if (say) a 64-bit application is execing a 32-bit
675 			 * application it is the architecture's responsibility
676 			 * to defer changing the value of TASK_SIZE until the
677 			 * switch really is going to happen - do this in
678 			 * flush_thread().	- akpm
679 			 */
680 			SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
681 
682 			interpreter = open_exec(elf_interpreter);
683 			retval = PTR_ERR(interpreter);
684 			if (IS_ERR(interpreter))
685 				goto out_free_interp;
686 			retval = kernel_read(interpreter, 0, bprm->buf,
687 					     BINPRM_BUF_SIZE);
688 			if (retval != BINPRM_BUF_SIZE) {
689 				if (retval >= 0)
690 					retval = -EIO;
691 				goto out_free_dentry;
692 			}
693 
694 			/* Get the exec headers */
695 			loc->interp_ex = *((struct exec *)bprm->buf);
696 			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
697 			break;
698 		}
699 		elf_ppnt++;
700 	}
701 
702 	elf_ppnt = elf_phdata;
703 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
704 		if (elf_ppnt->p_type == PT_GNU_STACK) {
705 			if (elf_ppnt->p_flags & PF_X)
706 				executable_stack = EXSTACK_ENABLE_X;
707 			else
708 				executable_stack = EXSTACK_DISABLE_X;
709 			break;
710 		}
711 	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
712 
713 	/* Some simple consistency checks for the interpreter */
714 	if (elf_interpreter) {
715 		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
716 
717 		/* Now figure out which format our binary is */
718 		if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
719 		    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
720 		    (N_MAGIC(loc->interp_ex) != QMAGIC))
721 			interpreter_type = INTERPRETER_ELF;
722 
723 		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
724 			interpreter_type &= ~INTERPRETER_ELF;
725 
726 		retval = -ELIBBAD;
727 		if (!interpreter_type)
728 			goto out_free_dentry;
729 
730 		/* Make sure only one type was selected */
731 		if ((interpreter_type & INTERPRETER_ELF) &&
732 		     interpreter_type != INTERPRETER_ELF) {
733 	     		// FIXME - ratelimit this before re-enabling
734 			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
735 			interpreter_type = INTERPRETER_ELF;
736 		}
737 		/* Verify the interpreter has a valid arch */
738 		if ((interpreter_type == INTERPRETER_ELF) &&
739 		    !elf_check_arch(&loc->interp_elf_ex))
740 			goto out_free_dentry;
741 	} else {
742 		/* Executables without an interpreter also need a personality  */
743 		SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
744 	}
745 
746 	/* OK, we are done with that, now set up the arg stuff,
747 	   and then start this sucker up */
748 	if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
749 		char *passed_p = passed_fileno;
750 		sprintf(passed_fileno, "%d", elf_exec_fileno);
751 
752 		if (elf_interpreter) {
753 			retval = copy_strings_kernel(1, &passed_p, bprm);
754 			if (retval)
755 				goto out_free_dentry;
756 			bprm->argc++;
757 		}
758 	}
759 
760 	/* Flush all traces of the currently running executable */
761 	retval = flush_old_exec(bprm);
762 	if (retval)
763 		goto out_free_dentry;
764 
765 	/* Discard our unneeded old files struct */
766 	if (files) {
767 		put_files_struct(files);
768 		files = NULL;
769 	}
770 
771 	/* OK, This is the point of no return */
772 	current->mm->start_data = 0;
773 	current->mm->end_data = 0;
774 	current->mm->end_code = 0;
775 	current->mm->mmap = NULL;
776 	current->flags &= ~PF_FORKNOEXEC;
777 	current->mm->def_flags = def_flags;
778 
779 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
780 	   may depend on the personality.  */
781 	SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
782 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
783 		current->personality |= READ_IMPLIES_EXEC;
784 
785 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
786 		current->flags |= PF_RANDOMIZE;
787 	arch_pick_mmap_layout(current->mm);
788 
789 	/* Do this so that we can load the interpreter, if need be.  We will
790 	   change some of these later */
791 	current->mm->free_area_cache = current->mm->mmap_base;
792 	current->mm->cached_hole_size = 0;
793 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
794 				 executable_stack);
795 	if (retval < 0) {
796 		send_sig(SIGKILL, current, 0);
797 		goto out_free_dentry;
798 	}
799 
800 	current->mm->start_stack = bprm->p;
801 
802 	/* Now we do a little grungy work by mmaping the ELF image into
803 	   the correct location in memory.  At this point, we assume that
804 	   the image should be loaded at fixed address, not at a variable
805 	   address. */
806 	for(i = 0, elf_ppnt = elf_phdata;
807 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
808 		int elf_prot = 0, elf_flags;
809 		unsigned long k, vaddr;
810 
811 		if (elf_ppnt->p_type != PT_LOAD)
812 			continue;
813 
814 		if (unlikely (elf_brk > elf_bss)) {
815 			unsigned long nbyte;
816 
817 			/* There was a PT_LOAD segment with p_memsz > p_filesz
818 			   before this one. Map anonymous pages, if needed,
819 			   and clear the area.  */
820 			retval = set_brk (elf_bss + load_bias,
821 					  elf_brk + load_bias);
822 			if (retval) {
823 				send_sig(SIGKILL, current, 0);
824 				goto out_free_dentry;
825 			}
826 			nbyte = ELF_PAGEOFFSET(elf_bss);
827 			if (nbyte) {
828 				nbyte = ELF_MIN_ALIGN - nbyte;
829 				if (nbyte > elf_brk - elf_bss)
830 					nbyte = elf_brk - elf_bss;
831 				if (clear_user((void __user *)elf_bss +
832 							load_bias, nbyte)) {
833 					/*
834 					 * This bss-zeroing can fail if the ELF
835 					 * file specifies odd protections. So
836 					 * we don't check the return value
837 					 */
838 				}
839 			}
840 		}
841 
842 		if (elf_ppnt->p_flags & PF_R)
843 			elf_prot |= PROT_READ;
844 		if (elf_ppnt->p_flags & PF_W)
845 			elf_prot |= PROT_WRITE;
846 		if (elf_ppnt->p_flags & PF_X)
847 			elf_prot |= PROT_EXEC;
848 
849 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
850 
851 		vaddr = elf_ppnt->p_vaddr;
852 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
853 			elf_flags |= MAP_FIXED;
854 		} else if (loc->elf_ex.e_type == ET_DYN) {
855 			/* Try and get dynamic programs out of the way of the
856 			 * default mmap base, as well as whatever program they
857 			 * might try to exec.  This is because the brk will
858 			 * follow the loader, and is not movable.  */
859 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
860 		}
861 
862 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
863 				elf_prot, elf_flags);
864 		if (BAD_ADDR(error)) {
865 			send_sig(SIGKILL, current, 0);
866 			goto out_free_dentry;
867 		}
868 
869 		if (!load_addr_set) {
870 			load_addr_set = 1;
871 			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
872 			if (loc->elf_ex.e_type == ET_DYN) {
873 				load_bias += error -
874 				             ELF_PAGESTART(load_bias + vaddr);
875 				load_addr += load_bias;
876 				reloc_func_desc = load_bias;
877 			}
878 		}
879 		k = elf_ppnt->p_vaddr;
880 		if (k < start_code)
881 			start_code = k;
882 		if (start_data < k)
883 			start_data = k;
884 
885 		/*
886 		 * Check to see if the section's size will overflow the
887 		 * allowed task size. Note that p_filesz must always be
888 		 * <= p_memsz so it is only necessary to check p_memsz.
889 		 */
890 		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
891 		    elf_ppnt->p_memsz > TASK_SIZE ||
892 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
893 			/* set_brk can never work. Avoid overflows. */
894 			send_sig(SIGKILL, current, 0);
895 			goto out_free_dentry;
896 		}
897 
898 		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
899 
900 		if (k > elf_bss)
901 			elf_bss = k;
902 		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
903 			end_code = k;
904 		if (end_data < k)
905 			end_data = k;
906 		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
907 		if (k > elf_brk)
908 			elf_brk = k;
909 	}
910 
911 	loc->elf_ex.e_entry += load_bias;
912 	elf_bss += load_bias;
913 	elf_brk += load_bias;
914 	start_code += load_bias;
915 	end_code += load_bias;
916 	start_data += load_bias;
917 	end_data += load_bias;
918 
919 	/* Calling set_brk effectively mmaps the pages that we need
920 	 * for the bss and break sections.  We must do this before
921 	 * mapping in the interpreter, to make sure it doesn't wind
922 	 * up getting placed where the bss needs to go.
923 	 */
924 	retval = set_brk(elf_bss, elf_brk);
925 	if (retval) {
926 		send_sig(SIGKILL, current, 0);
927 		goto out_free_dentry;
928 	}
929 	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
930 		send_sig(SIGSEGV, current, 0);
931 		retval = -EFAULT; /* Nobody gets to see this, but.. */
932 		goto out_free_dentry;
933 	}
934 
935 	if (elf_interpreter) {
936 		if (interpreter_type == INTERPRETER_AOUT)
937 			elf_entry = load_aout_interp(&loc->interp_ex,
938 						     interpreter);
939 		else
940 			elf_entry = load_elf_interp(&loc->interp_elf_ex,
941 						    interpreter,
942 						    &interp_load_addr);
943 		if (BAD_ADDR(elf_entry)) {
944 			printk(KERN_ERR "Unable to load interpreter %.128s\n",
945 				elf_interpreter);
946 			force_sig(SIGSEGV, current);
947 			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
948 			goto out_free_dentry;
949 		}
950 		reloc_func_desc = interp_load_addr;
951 
952 		allow_write_access(interpreter);
953 		fput(interpreter);
954 		kfree(elf_interpreter);
955 	} else {
956 		elf_entry = loc->elf_ex.e_entry;
957 		if (BAD_ADDR(elf_entry)) {
958 			send_sig(SIGSEGV, current, 0);
959 			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
960 			goto out_free_dentry;
961 		}
962 	}
963 
964 	kfree(elf_phdata);
965 
966 	if (interpreter_type != INTERPRETER_AOUT)
967 		sys_close(elf_exec_fileno);
968 
969 	set_binfmt(&elf_format);
970 
971 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
972 	retval = arch_setup_additional_pages(bprm, executable_stack);
973 	if (retval < 0) {
974 		send_sig(SIGKILL, current, 0);
975 		goto out;
976 	}
977 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
978 
979 	compute_creds(bprm);
980 	current->flags &= ~PF_FORKNOEXEC;
981 	create_elf_tables(bprm, &loc->elf_ex,
982 			  (interpreter_type == INTERPRETER_AOUT),
983 			  load_addr, interp_load_addr);
984 	/* N.B. passed_fileno might not be initialized? */
985 	if (interpreter_type == INTERPRETER_AOUT)
986 		current->mm->arg_start += strlen(passed_fileno) + 1;
987 	current->mm->end_code = end_code;
988 	current->mm->start_code = start_code;
989 	current->mm->start_data = start_data;
990 	current->mm->end_data = end_data;
991 	current->mm->start_stack = bprm->p;
992 
993 	if (current->personality & MMAP_PAGE_ZERO) {
994 		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
995 		   and some applications "depend" upon this behavior.
996 		   Since we do not have the power to recompile these, we
997 		   emulate the SVr4 behavior. Sigh. */
998 		down_write(&current->mm->mmap_sem);
999 		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1000 				MAP_FIXED | MAP_PRIVATE, 0);
1001 		up_write(&current->mm->mmap_sem);
1002 	}
1003 
1004 #ifdef ELF_PLAT_INIT
1005 	/*
1006 	 * The ABI may specify that certain registers be set up in special
1007 	 * ways (on i386 %edx is the address of a DT_FINI function, for
1008 	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1009 	 * that the e_entry field is the address of the function descriptor
1010 	 * for the startup routine, rather than the address of the startup
1011 	 * routine itself.  This macro performs whatever initialization to
1012 	 * the regs structure is required as well as any relocations to the
1013 	 * function descriptor entries when executing dynamically links apps.
1014 	 */
1015 	ELF_PLAT_INIT(regs, reloc_func_desc);
1016 #endif
1017 
1018 	start_thread(regs, elf_entry, bprm->p);
1019 	if (unlikely(current->ptrace & PT_PTRACED)) {
1020 		if (current->ptrace & PT_TRACE_EXEC)
1021 			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1022 		else
1023 			send_sig(SIGTRAP, current, 0);
1024 	}
1025 	retval = 0;
1026 out:
1027 	kfree(loc);
1028 out_ret:
1029 	return retval;
1030 
1031 	/* error cleanup */
1032 out_free_dentry:
1033 	allow_write_access(interpreter);
1034 	if (interpreter)
1035 		fput(interpreter);
1036 out_free_interp:
1037 	kfree(elf_interpreter);
1038 out_free_file:
1039 	sys_close(elf_exec_fileno);
1040 out_free_fh:
1041 	if (files) {
1042 		put_files_struct(current->files);
1043 		current->files = files;
1044 	}
1045 out_free_ph:
1046 	kfree(elf_phdata);
1047 	goto out;
1048 }
1049 
1050 /* This is really simpleminded and specialized - we are loading an
1051    a.out library that is given an ELF header. */
1052 static int load_elf_library(struct file *file)
1053 {
1054 	struct elf_phdr *elf_phdata;
1055 	struct elf_phdr *eppnt;
1056 	unsigned long elf_bss, bss, len;
1057 	int retval, error, i, j;
1058 	struct elfhdr elf_ex;
1059 
1060 	error = -ENOEXEC;
1061 	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1062 	if (retval != sizeof(elf_ex))
1063 		goto out;
1064 
1065 	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1066 		goto out;
1067 
1068 	/* First of all, some simple consistency checks */
1069 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1070 	    !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1071 		goto out;
1072 
1073 	/* Now read in all of the header information */
1074 
1075 	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1076 	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1077 
1078 	error = -ENOMEM;
1079 	elf_phdata = kmalloc(j, GFP_KERNEL);
1080 	if (!elf_phdata)
1081 		goto out;
1082 
1083 	eppnt = elf_phdata;
1084 	error = -ENOEXEC;
1085 	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1086 	if (retval != j)
1087 		goto out_free_ph;
1088 
1089 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1090 		if ((eppnt + i)->p_type == PT_LOAD)
1091 			j++;
1092 	if (j != 1)
1093 		goto out_free_ph;
1094 
1095 	while (eppnt->p_type != PT_LOAD)
1096 		eppnt++;
1097 
1098 	/* Now use mmap to map the library into memory. */
1099 	down_write(&current->mm->mmap_sem);
1100 	error = do_mmap(file,
1101 			ELF_PAGESTART(eppnt->p_vaddr),
1102 			(eppnt->p_filesz +
1103 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1104 			PROT_READ | PROT_WRITE | PROT_EXEC,
1105 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1106 			(eppnt->p_offset -
1107 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1108 	up_write(&current->mm->mmap_sem);
1109 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1110 		goto out_free_ph;
1111 
1112 	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1113 	if (padzero(elf_bss)) {
1114 		error = -EFAULT;
1115 		goto out_free_ph;
1116 	}
1117 
1118 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1119 			    ELF_MIN_ALIGN - 1);
1120 	bss = eppnt->p_memsz + eppnt->p_vaddr;
1121 	if (bss > len) {
1122 		down_write(&current->mm->mmap_sem);
1123 		do_brk(len, bss - len);
1124 		up_write(&current->mm->mmap_sem);
1125 	}
1126 	error = 0;
1127 
1128 out_free_ph:
1129 	kfree(elf_phdata);
1130 out:
1131 	return error;
1132 }
1133 
1134 /*
1135  * Note that some platforms still use traditional core dumps and not
1136  * the ELF core dump.  Each platform can select it as appropriate.
1137  */
1138 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1139 
1140 /*
1141  * ELF core dumper
1142  *
1143  * Modelled on fs/exec.c:aout_core_dump()
1144  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1145  */
1146 /*
1147  * These are the only things you should do on a core-file: use only these
1148  * functions to write out all the necessary info.
1149  */
1150 static int dump_write(struct file *file, const void *addr, int nr)
1151 {
1152 	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1153 }
1154 
1155 static int dump_seek(struct file *file, loff_t off)
1156 {
1157 	if (file->f_op->llseek) {
1158 		if (file->f_op->llseek(file, off, 0) != off)
1159 			return 0;
1160 	} else
1161 		file->f_pos = off;
1162 	return 1;
1163 }
1164 
1165 /*
1166  * Decide whether a segment is worth dumping; default is yes to be
1167  * sure (missing info is worse than too much; etc).
1168  * Personally I'd include everything, and use the coredump limit...
1169  *
1170  * I think we should skip something. But I am not sure how. H.J.
1171  */
1172 static int maydump(struct vm_area_struct *vma)
1173 {
1174 	/* Do not dump I/O mapped devices or special mappings */
1175 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
1176 		return 0;
1177 
1178 	/* Dump shared memory only if mapped from an anonymous file. */
1179 	if (vma->vm_flags & VM_SHARED)
1180 		return vma->vm_file->f_dentry->d_inode->i_nlink == 0;
1181 
1182 	/* If it hasn't been written to, don't write it out */
1183 	if (!vma->anon_vma)
1184 		return 0;
1185 
1186 	return 1;
1187 }
1188 
1189 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
1190 
1191 /* An ELF note in memory */
1192 struct memelfnote
1193 {
1194 	const char *name;
1195 	int type;
1196 	unsigned int datasz;
1197 	void *data;
1198 };
1199 
1200 static int notesize(struct memelfnote *en)
1201 {
1202 	int sz;
1203 
1204 	sz = sizeof(struct elf_note);
1205 	sz += roundup(strlen(en->name) + 1, 4);
1206 	sz += roundup(en->datasz, 4);
1207 
1208 	return sz;
1209 }
1210 
1211 #define DUMP_WRITE(addr, nr)	\
1212 	do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1213 #define DUMP_SEEK(off)	\
1214 	do { if (!dump_seek(file, (off))) return 0; } while(0)
1215 
1216 static int writenote(struct memelfnote *men, struct file *file)
1217 {
1218 	struct elf_note en;
1219 
1220 	en.n_namesz = strlen(men->name) + 1;
1221 	en.n_descsz = men->datasz;
1222 	en.n_type = men->type;
1223 
1224 	DUMP_WRITE(&en, sizeof(en));
1225 	DUMP_WRITE(men->name, en.n_namesz);
1226 	/* XXX - cast from long long to long to avoid need for libgcc.a */
1227 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1228 	DUMP_WRITE(men->data, men->datasz);
1229 	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
1230 
1231 	return 1;
1232 }
1233 #undef DUMP_WRITE
1234 #undef DUMP_SEEK
1235 
1236 #define DUMP_WRITE(addr, nr)	\
1237 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1238 		goto end_coredump;
1239 #define DUMP_SEEK(off)	\
1240 	if (!dump_seek(file, (off))) \
1241 		goto end_coredump;
1242 
1243 static void fill_elf_header(struct elfhdr *elf, int segs)
1244 {
1245 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1246 	elf->e_ident[EI_CLASS] = ELF_CLASS;
1247 	elf->e_ident[EI_DATA] = ELF_DATA;
1248 	elf->e_ident[EI_VERSION] = EV_CURRENT;
1249 	elf->e_ident[EI_OSABI] = ELF_OSABI;
1250 	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1251 
1252 	elf->e_type = ET_CORE;
1253 	elf->e_machine = ELF_ARCH;
1254 	elf->e_version = EV_CURRENT;
1255 	elf->e_entry = 0;
1256 	elf->e_phoff = sizeof(struct elfhdr);
1257 	elf->e_shoff = 0;
1258 	elf->e_flags = ELF_CORE_EFLAGS;
1259 	elf->e_ehsize = sizeof(struct elfhdr);
1260 	elf->e_phentsize = sizeof(struct elf_phdr);
1261 	elf->e_phnum = segs;
1262 	elf->e_shentsize = 0;
1263 	elf->e_shnum = 0;
1264 	elf->e_shstrndx = 0;
1265 	return;
1266 }
1267 
1268 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset)
1269 {
1270 	phdr->p_type = PT_NOTE;
1271 	phdr->p_offset = offset;
1272 	phdr->p_vaddr = 0;
1273 	phdr->p_paddr = 0;
1274 	phdr->p_filesz = sz;
1275 	phdr->p_memsz = 0;
1276 	phdr->p_flags = 0;
1277 	phdr->p_align = 0;
1278 	return;
1279 }
1280 
1281 static void fill_note(struct memelfnote *note, const char *name, int type,
1282 		unsigned int sz, void *data)
1283 {
1284 	note->name = name;
1285 	note->type = type;
1286 	note->datasz = sz;
1287 	note->data = data;
1288 	return;
1289 }
1290 
1291 /*
1292  * fill up all the fields in prstatus from the given task struct, except
1293  * registers which need to be filled up separately.
1294  */
1295 static void fill_prstatus(struct elf_prstatus *prstatus,
1296 		struct task_struct *p, long signr)
1297 {
1298 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1299 	prstatus->pr_sigpend = p->pending.signal.sig[0];
1300 	prstatus->pr_sighold = p->blocked.sig[0];
1301 	prstatus->pr_pid = p->pid;
1302 	prstatus->pr_ppid = p->parent->pid;
1303 	prstatus->pr_pgrp = process_group(p);
1304 	prstatus->pr_sid = p->signal->session;
1305 	if (thread_group_leader(p)) {
1306 		/*
1307 		 * This is the record for the group leader.  Add in the
1308 		 * cumulative times of previous dead threads.  This total
1309 		 * won't include the time of each live thread whose state
1310 		 * is included in the core dump.  The final total reported
1311 		 * to our parent process when it calls wait4 will include
1312 		 * those sums as well as the little bit more time it takes
1313 		 * this and each other thread to finish dying after the
1314 		 * core dump synchronization phase.
1315 		 */
1316 		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1317 				   &prstatus->pr_utime);
1318 		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1319 				   &prstatus->pr_stime);
1320 	} else {
1321 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
1322 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
1323 	}
1324 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1325 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1326 }
1327 
1328 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1329 		       struct mm_struct *mm)
1330 {
1331 	unsigned int i, len;
1332 
1333 	/* first copy the parameters from user space */
1334 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1335 
1336 	len = mm->arg_end - mm->arg_start;
1337 	if (len >= ELF_PRARGSZ)
1338 		len = ELF_PRARGSZ-1;
1339 	if (copy_from_user(&psinfo->pr_psargs,
1340 		           (const char __user *)mm->arg_start, len))
1341 		return -EFAULT;
1342 	for(i = 0; i < len; i++)
1343 		if (psinfo->pr_psargs[i] == 0)
1344 			psinfo->pr_psargs[i] = ' ';
1345 	psinfo->pr_psargs[len] = 0;
1346 
1347 	psinfo->pr_pid = p->pid;
1348 	psinfo->pr_ppid = p->parent->pid;
1349 	psinfo->pr_pgrp = process_group(p);
1350 	psinfo->pr_sid = p->signal->session;
1351 
1352 	i = p->state ? ffz(~p->state) + 1 : 0;
1353 	psinfo->pr_state = i;
1354 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1355 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1356 	psinfo->pr_nice = task_nice(p);
1357 	psinfo->pr_flag = p->flags;
1358 	SET_UID(psinfo->pr_uid, p->uid);
1359 	SET_GID(psinfo->pr_gid, p->gid);
1360 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1361 
1362 	return 0;
1363 }
1364 
1365 /* Here is the structure in which status of each thread is captured. */
1366 struct elf_thread_status
1367 {
1368 	struct list_head list;
1369 	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1370 	elf_fpregset_t fpu;		/* NT_PRFPREG */
1371 	struct task_struct *thread;
1372 #ifdef ELF_CORE_COPY_XFPREGS
1373 	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
1374 #endif
1375 	struct memelfnote notes[3];
1376 	int num_notes;
1377 };
1378 
1379 /*
1380  * In order to add the specific thread information for the elf file format,
1381  * we need to keep a linked list of every threads pr_status and then create
1382  * a single section for them in the final core file.
1383  */
1384 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1385 {
1386 	int sz = 0;
1387 	struct task_struct *p = t->thread;
1388 	t->num_notes = 0;
1389 
1390 	fill_prstatus(&t->prstatus, p, signr);
1391 	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1392 
1393 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1394 		  &(t->prstatus));
1395 	t->num_notes++;
1396 	sz += notesize(&t->notes[0]);
1397 
1398 	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1399 								&t->fpu))) {
1400 		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1401 			  &(t->fpu));
1402 		t->num_notes++;
1403 		sz += notesize(&t->notes[1]);
1404 	}
1405 
1406 #ifdef ELF_CORE_COPY_XFPREGS
1407 	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1408 		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1409 			  &t->xfpu);
1410 		t->num_notes++;
1411 		sz += notesize(&t->notes[2]);
1412 	}
1413 #endif
1414 	return sz;
1415 }
1416 
1417 /*
1418  * Actual dumper
1419  *
1420  * This is a two-pass process; first we find the offsets of the bits,
1421  * and then they are actually written out.  If we run out of core limit
1422  * we just truncate.
1423  */
1424 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1425 {
1426 #define	NUM_NOTES	6
1427 	int has_dumped = 0;
1428 	mm_segment_t fs;
1429 	int segs;
1430 	size_t size = 0;
1431 	int i;
1432 	struct vm_area_struct *vma;
1433 	struct elfhdr *elf = NULL;
1434 	off_t offset = 0, dataoff;
1435 	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1436 	int numnote;
1437 	struct memelfnote *notes = NULL;
1438 	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
1439 	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
1440  	struct task_struct *g, *p;
1441  	LIST_HEAD(thread_list);
1442  	struct list_head *t;
1443 	elf_fpregset_t *fpu = NULL;
1444 #ifdef ELF_CORE_COPY_XFPREGS
1445 	elf_fpxregset_t *xfpu = NULL;
1446 #endif
1447 	int thread_status_size = 0;
1448 	elf_addr_t *auxv;
1449 
1450 	/*
1451 	 * We no longer stop all VM operations.
1452 	 *
1453 	 * This is because those proceses that could possibly change map_count
1454 	 * or the mmap / vma pages are now blocked in do_exit on current
1455 	 * finishing this core dump.
1456 	 *
1457 	 * Only ptrace can touch these memory addresses, but it doesn't change
1458 	 * the map_count or the pages allocated. So no possibility of crashing
1459 	 * exists while dumping the mm->vm_next areas to the core file.
1460 	 */
1461 
1462 	/* alloc memory for large data structures: too large to be on stack */
1463 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1464 	if (!elf)
1465 		goto cleanup;
1466 	prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1467 	if (!prstatus)
1468 		goto cleanup;
1469 	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1470 	if (!psinfo)
1471 		goto cleanup;
1472 	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1473 	if (!notes)
1474 		goto cleanup;
1475 	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1476 	if (!fpu)
1477 		goto cleanup;
1478 #ifdef ELF_CORE_COPY_XFPREGS
1479 	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1480 	if (!xfpu)
1481 		goto cleanup;
1482 #endif
1483 
1484 	if (signr) {
1485 		struct elf_thread_status *tmp;
1486 		read_lock(&tasklist_lock);
1487 		do_each_thread(g,p)
1488 			if (current->mm == p->mm && current != p) {
1489 				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1490 				if (!tmp) {
1491 					read_unlock(&tasklist_lock);
1492 					goto cleanup;
1493 				}
1494 				INIT_LIST_HEAD(&tmp->list);
1495 				tmp->thread = p;
1496 				list_add(&tmp->list, &thread_list);
1497 			}
1498 		while_each_thread(g,p);
1499 		read_unlock(&tasklist_lock);
1500 		list_for_each(t, &thread_list) {
1501 			struct elf_thread_status *tmp;
1502 			int sz;
1503 
1504 			tmp = list_entry(t, struct elf_thread_status, list);
1505 			sz = elf_dump_thread_status(signr, tmp);
1506 			thread_status_size += sz;
1507 		}
1508 	}
1509 	/* now collect the dump for the current */
1510 	memset(prstatus, 0, sizeof(*prstatus));
1511 	fill_prstatus(prstatus, current, signr);
1512 	elf_core_copy_regs(&prstatus->pr_reg, regs);
1513 
1514 	segs = current->mm->map_count;
1515 #ifdef ELF_CORE_EXTRA_PHDRS
1516 	segs += ELF_CORE_EXTRA_PHDRS;
1517 #endif
1518 
1519 	/* Set up header */
1520 	fill_elf_header(elf, segs + 1);	/* including notes section */
1521 
1522 	has_dumped = 1;
1523 	current->flags |= PF_DUMPCORE;
1524 
1525 	/*
1526 	 * Set up the notes in similar form to SVR4 core dumps made
1527 	 * with info from their /proc.
1528 	 */
1529 
1530 	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1531 	fill_psinfo(psinfo, current->group_leader, current->mm);
1532 	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1533 
1534 	numnote = 2;
1535 
1536 	auxv = (elf_addr_t *)current->mm->saved_auxv;
1537 
1538 	i = 0;
1539 	do
1540 		i += 2;
1541 	while (auxv[i - 2] != AT_NULL);
1542 	fill_note(&notes[numnote++], "CORE", NT_AUXV,
1543 		  i * sizeof(elf_addr_t), auxv);
1544 
1545   	/* Try to dump the FPU. */
1546 	if ((prstatus->pr_fpvalid =
1547 	     elf_core_copy_task_fpregs(current, regs, fpu)))
1548 		fill_note(notes + numnote++,
1549 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1550 #ifdef ELF_CORE_COPY_XFPREGS
1551 	if (elf_core_copy_task_xfpregs(current, xfpu))
1552 		fill_note(notes + numnote++,
1553 			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1554 #endif
1555 
1556 	fs = get_fs();
1557 	set_fs(KERNEL_DS);
1558 
1559 	DUMP_WRITE(elf, sizeof(*elf));
1560 	offset += sizeof(*elf);				/* Elf header */
1561 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
1562 
1563 	/* Write notes phdr entry */
1564 	{
1565 		struct elf_phdr phdr;
1566 		int sz = 0;
1567 
1568 		for (i = 0; i < numnote; i++)
1569 			sz += notesize(notes + i);
1570 
1571 		sz += thread_status_size;
1572 
1573 		fill_elf_note_phdr(&phdr, sz, offset);
1574 		offset += sz;
1575 		DUMP_WRITE(&phdr, sizeof(phdr));
1576 	}
1577 
1578 	/* Page-align dumped data */
1579 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1580 
1581 	/* Write program headers for segments dump */
1582 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1583 		struct elf_phdr phdr;
1584 		size_t sz;
1585 
1586 		sz = vma->vm_end - vma->vm_start;
1587 
1588 		phdr.p_type = PT_LOAD;
1589 		phdr.p_offset = offset;
1590 		phdr.p_vaddr = vma->vm_start;
1591 		phdr.p_paddr = 0;
1592 		phdr.p_filesz = maydump(vma) ? sz : 0;
1593 		phdr.p_memsz = sz;
1594 		offset += phdr.p_filesz;
1595 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1596 		if (vma->vm_flags & VM_WRITE)
1597 			phdr.p_flags |= PF_W;
1598 		if (vma->vm_flags & VM_EXEC)
1599 			phdr.p_flags |= PF_X;
1600 		phdr.p_align = ELF_EXEC_PAGESIZE;
1601 
1602 		DUMP_WRITE(&phdr, sizeof(phdr));
1603 	}
1604 
1605 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1606 	ELF_CORE_WRITE_EXTRA_PHDRS;
1607 #endif
1608 
1609  	/* write out the notes section */
1610 	for (i = 0; i < numnote; i++)
1611 		if (!writenote(notes + i, file))
1612 			goto end_coredump;
1613 
1614 	/* write out the thread status notes section */
1615 	list_for_each(t, &thread_list) {
1616 		struct elf_thread_status *tmp =
1617 				list_entry(t, struct elf_thread_status, list);
1618 
1619 		for (i = 0; i < tmp->num_notes; i++)
1620 			if (!writenote(&tmp->notes[i], file))
1621 				goto end_coredump;
1622 	}
1623 
1624 	DUMP_SEEK(dataoff);
1625 
1626 	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1627 		unsigned long addr;
1628 
1629 		if (!maydump(vma))
1630 			continue;
1631 
1632 		for (addr = vma->vm_start;
1633 		     addr < vma->vm_end;
1634 		     addr += PAGE_SIZE) {
1635 			struct page *page;
1636 			struct vm_area_struct *vma;
1637 
1638 			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1639 						&page, &vma) <= 0) {
1640 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
1641 			} else {
1642 				if (page == ZERO_PAGE(addr)) {
1643 					DUMP_SEEK(file->f_pos + PAGE_SIZE);
1644 				} else {
1645 					void *kaddr;
1646 					flush_cache_page(vma, addr,
1647 							 page_to_pfn(page));
1648 					kaddr = kmap(page);
1649 					if ((size += PAGE_SIZE) > limit ||
1650 					    !dump_write(file, kaddr,
1651 					    PAGE_SIZE)) {
1652 						kunmap(page);
1653 						page_cache_release(page);
1654 						goto end_coredump;
1655 					}
1656 					kunmap(page);
1657 				}
1658 				page_cache_release(page);
1659 			}
1660 		}
1661 	}
1662 
1663 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1664 	ELF_CORE_WRITE_EXTRA_DATA;
1665 #endif
1666 
1667 	if ((off_t)file->f_pos != offset) {
1668 		/* Sanity check */
1669 		printk(KERN_WARNING
1670 		       "elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
1671 		       (off_t)file->f_pos, offset);
1672 	}
1673 
1674 end_coredump:
1675 	set_fs(fs);
1676 
1677 cleanup:
1678 	while (!list_empty(&thread_list)) {
1679 		struct list_head *tmp = thread_list.next;
1680 		list_del(tmp);
1681 		kfree(list_entry(tmp, struct elf_thread_status, list));
1682 	}
1683 
1684 	kfree(elf);
1685 	kfree(prstatus);
1686 	kfree(psinfo);
1687 	kfree(notes);
1688 	kfree(fpu);
1689 #ifdef ELF_CORE_COPY_XFPREGS
1690 	kfree(xfpu);
1691 #endif
1692 	return has_dumped;
1693 #undef NUM_NOTES
1694 }
1695 
1696 #endif		/* USE_ELF_CORE_DUMP */
1697 
1698 static int __init init_elf_binfmt(void)
1699 {
1700 	return register_binfmt(&elf_format);
1701 }
1702 
1703 static void __exit exit_elf_binfmt(void)
1704 {
1705 	/* Remove the COFF and ELF loaders. */
1706 	unregister_binfmt(&elf_format);
1707 }
1708 
1709 core_initcall(init_elf_binfmt);
1710 module_exit(exit_elf_binfmt);
1711 MODULE_LICENSE("GPL");
1712