xref: /freebsd/sys/kern/imgact_elf.c (revision 4cf49a43559ed9fdad601bdcccd2c55963008675)
1 /*-
2  * Copyright (c) 1995-1996 S�ren Schmidt
3  * Copyright (c) 1996 Peter Wemm
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer
11  *    in this position and unchanged.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software withough specific prior written permission
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 #include "opt_rlimit.h"
33 
34 #include <sys/param.h>
35 #include <sys/exec.h>
36 #include <sys/fcntl.h>
37 #include <sys/imgact.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mman.h>
42 #include <sys/namei.h>
43 #include <sys/pioctl.h>
44 #include <sys/proc.h>
45 #include <sys/procfs.h>
46 #include <sys/resourcevar.h>
47 #include <sys/signalvar.h>
48 #include <sys/stat.h>
49 #include <sys/syscall.h>
50 #include <sys/sysctl.h>
51 #include <sys/sysent.h>
52 #include <sys/systm.h>
53 #include <sys/vnode.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_kern.h>
57 #include <vm/vm_param.h>
58 #include <vm/pmap.h>
59 #include <sys/lock.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_prot.h>
63 #include <vm/vm_extern.h>
64 
65 #include <machine/elf.h>
66 #include <machine/md_var.h>
67 
68 __ElfType(Brandinfo);
69 __ElfType(Auxargs);
70 
71 static int elf_check_header __P((const Elf_Ehdr *hdr));
72 static int elf_freebsd_fixup __P((long **stack_base,
73     struct image_params *imgp));
74 static int elf_load_file __P((struct proc *p, const char *file, u_long *addr,
75     u_long *entry));
76 static int elf_load_section __P((struct proc *p,
77     struct vmspace *vmspace, struct vnode *vp,
78     vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
79     vm_prot_t prot));
80 static int exec_elf_imgact __P((struct image_params *imgp));
81 
82 static int elf_trace = 0;
83 SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, "");
84 
85 /*
86  * XXX Maximum length of an ELF brand (sysctl wants a statically-allocated
87  * buffer).
88  */
89 #define	MAXBRANDLEN	16
90 
91 static struct sysentvec elf_freebsd_sysvec = {
92         SYS_MAXSYSCALL,
93         sysent,
94         0,
95         0,
96         0,
97         0,
98         0,
99         0,
100         elf_freebsd_fixup,
101         sendsig,
102         sigcode,
103         &szsigcode,
104         0,
105 	"FreeBSD ELF",
106 	elf_coredump
107 };
108 
109 static Elf_Brandinfo freebsd_brand_info = {
110 						"FreeBSD",
111 						"",
112 						"/usr/libexec/ld-elf.so.1",
113 						&elf_freebsd_sysvec
114 					  };
115 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = {
116 							&freebsd_brand_info,
117 							NULL, NULL, NULL,
118 							NULL, NULL, NULL, NULL
119 						    };
120 
121 int
122 elf_insert_brand_entry(Elf_Brandinfo *entry)
123 {
124 	int i;
125 
126 	for (i=1; i<MAX_BRANDS; i++) {
127 		if (elf_brand_list[i] == NULL) {
128 			elf_brand_list[i] = entry;
129 			break;
130 		}
131 	}
132 	if (i == MAX_BRANDS)
133 		return -1;
134 	return 0;
135 }
136 
137 int
138 elf_remove_brand_entry(Elf_Brandinfo *entry)
139 {
140 	int i;
141 
142 	for (i=1; i<MAX_BRANDS; i++) {
143 		if (elf_brand_list[i] == entry) {
144 			elf_brand_list[i] = NULL;
145 			break;
146 		}
147 	}
148 	if (i == MAX_BRANDS)
149 		return -1;
150 	return 0;
151 }
152 
153 int
154 elf_brand_inuse(Elf_Brandinfo *entry)
155 {
156 	struct proc *p;
157 
158 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
159 		if (p->p_sysent == entry->sysvec)
160 			return TRUE;
161 	}
162 
163 	return FALSE;
164 }
165 
166 static int
167 elf_check_header(const Elf_Ehdr *hdr)
168 {
169 	if (!IS_ELF(*hdr) ||
170 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
171 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
172 	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
173 		return ENOEXEC;
174 
175 	if (!ELF_MACHINE_OK(hdr->e_machine))
176 		return ENOEXEC;
177 
178 	if (hdr->e_version != ELF_TARG_VER)
179 		return ENOEXEC;
180 
181 	return 0;
182 }
183 
184 static int
185 elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
186 {
187 	size_t map_len;
188 	vm_offset_t map_addr;
189 	int error, rv;
190 	size_t copy_len;
191 	vm_object_t object;
192 	vm_offset_t file_addr;
193 	vm_offset_t data_buf = 0;
194 
195 	object = vp->v_object;
196 	error = 0;
197 
198 	map_addr = trunc_page((vm_offset_t)vmaddr);
199 	file_addr = trunc_page(offset);
200 
201 	/*
202 	 * We have two choices.  We can either clear the data in the last page
203 	 * of an oversized mapping, or we can start the anon mapping a page
204 	 * early and copy the initialized data into that first page.  We
205 	 * choose the second..
206 	 */
207 	if (memsz > filsz)
208 		map_len = trunc_page(offset+filsz) - file_addr;
209 	else
210 		map_len = round_page(offset+filsz) - file_addr;
211 
212 	if (map_len != 0) {
213 		vm_object_reference(object);
214 		vm_map_lock(&vmspace->vm_map);
215 		rv = vm_map_insert(&vmspace->vm_map,
216 				      object,
217 				      file_addr,	/* file offset */
218 				      map_addr,		/* virtual start */
219 				      map_addr + map_len,/* virtual end */
220 				      prot,
221 				      VM_PROT_ALL,
222 				      MAP_COPY_ON_WRITE | MAP_PREFAULT);
223 		vm_map_unlock(&vmspace->vm_map);
224 		if (rv != KERN_SUCCESS)
225 			return EINVAL;
226 
227 		/* we can stop now if we've covered it all */
228 		if (memsz == filsz)
229 			return 0;
230 	}
231 
232 
233 	/*
234 	 * We have to get the remaining bit of the file into the first part
235 	 * of the oversized map segment.  This is normally because the .data
236 	 * segment in the file is extended to provide bss.  It's a neat idea
237 	 * to try and save a page, but it's a pain in the behind to implement.
238 	 */
239 	copy_len = (offset + filsz) - trunc_page(offset + filsz);
240 	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
241 	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
242 
243 	/* This had damn well better be true! */
244         if (map_len != 0) {
245 		vm_map_lock(&vmspace->vm_map);
246 		rv = vm_map_insert(&vmspace->vm_map, NULL, 0,
247 					map_addr, map_addr + map_len,
248 					VM_PROT_ALL, VM_PROT_ALL, 0);
249 		vm_map_unlock(&vmspace->vm_map);
250 		if (rv != KERN_SUCCESS)
251 			return EINVAL;
252 	}
253 
254 	if (copy_len != 0) {
255 		vm_object_reference(object);
256 		rv = vm_map_find(exec_map,
257 				 object,
258 				 trunc_page(offset + filsz),
259 				 &data_buf,
260 				 PAGE_SIZE,
261 				 TRUE,
262 				 VM_PROT_READ,
263 				 VM_PROT_ALL,
264 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
265 		if (rv != KERN_SUCCESS) {
266 			vm_object_deallocate(object);
267 			return EINVAL;
268 		}
269 
270 		/* send the page fragment to user space */
271 		error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
272 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
273 		if (error)
274 			return (error);
275 	}
276 
277 	/*
278 	 * set it to the specified protection
279 	 */
280 	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
281 		       FALSE);
282 
283 	return error;
284 }
285 
286 /*
287  * Load the file "file" into memory.  It may be either a shared object
288  * or an executable.
289  *
290  * The "addr" reference parameter is in/out.  On entry, it specifies
291  * the address where a shared object should be loaded.  If the file is
292  * an executable, this value is ignored.  On exit, "addr" specifies
293  * where the file was actually loaded.
294  *
295  * The "entry" reference parameter is out only.  On exit, it specifies
296  * the entry point for the loaded file.
297  */
298 static int
299 elf_load_file(struct proc *p, const char *file, u_long *addr, u_long *entry)
300 {
301 	const Elf_Ehdr *hdr = NULL;
302 	const Elf_Phdr *phdr = NULL;
303 	struct nameidata nd;
304 	struct vmspace *vmspace = p->p_vmspace;
305 	struct vattr attr;
306 	struct image_params image_params, *imgp;
307 	vm_prot_t prot;
308 	u_long rbase;
309 	u_long base_addr = 0;
310 	int error, i, numsegs;
311 
312 	imgp = &image_params;
313 	/*
314 	 * Initialize part of the common data
315 	 */
316 	imgp->proc = p;
317 	imgp->uap = NULL;
318 	imgp->attr = &attr;
319 	imgp->firstpage = NULL;
320 	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
321 
322 	if (imgp->image_header == NULL) {
323 		nd.ni_vp = NULL;
324 		error = ENOMEM;
325 		goto fail;
326 	}
327 
328         NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p);
329 
330 	if ((error = namei(&nd)) != 0) {
331 		nd.ni_vp = NULL;
332 		goto fail;
333 	}
334 
335 	imgp->vp = nd.ni_vp;
336 
337 	/*
338 	 * Check permissions, modes, uid, etc on the file, and "open" it.
339 	 */
340 	error = exec_check_permissions(imgp);
341 	if (error) {
342 		VOP_UNLOCK(nd.ni_vp, 0, p);
343 		goto fail;
344 	}
345 
346 	error = exec_map_first_page(imgp);
347 	VOP_UNLOCK(nd.ni_vp, 0, p);
348 	if (error)
349                 goto fail;
350 
351 	hdr = (const Elf_Ehdr *)imgp->image_header;
352 	if ((error = elf_check_header(hdr)) != 0)
353 		goto fail;
354 	if (hdr->e_type == ET_DYN)
355 		rbase = *addr;
356 	else if (hdr->e_type == ET_EXEC)
357 		rbase = 0;
358 	else {
359 		error = ENOEXEC;
360 		goto fail;
361 	}
362 
363 	/* Only support headers that fit within first page for now */
364 	if ((hdr->e_phoff > PAGE_SIZE) ||
365 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
366 		error = ENOEXEC;
367 		goto fail;
368 	}
369 
370 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
371 
372 	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
373 		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
374 			prot = 0;
375 			if (phdr[i].p_flags & PF_X)
376   				prot |= VM_PROT_EXECUTE;
377 			if (phdr[i].p_flags & PF_W)
378   				prot |= VM_PROT_WRITE;
379 			if (phdr[i].p_flags & PF_R)
380   				prot |= VM_PROT_READ;
381 
382 			if ((error = elf_load_section(p, vmspace, nd.ni_vp,
383   						     phdr[i].p_offset,
384   						     (caddr_t)phdr[i].p_vaddr +
385 							rbase,
386   						     phdr[i].p_memsz,
387   						     phdr[i].p_filesz, prot)) != 0)
388 				goto fail;
389 			/*
390 			 * Establish the base address if this is the
391 			 * first segment.
392 			 */
393 			if (numsegs == 0)
394   				base_addr = trunc_page(phdr[i].p_vaddr + rbase);
395 			numsegs++;
396 		}
397 	}
398 	*addr = base_addr;
399 	*entry=(unsigned long)hdr->e_entry + rbase;
400 
401 fail:
402 	if (imgp->firstpage)
403 		exec_unmap_first_page(imgp);
404 	if (imgp->image_header)
405 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
406 			PAGE_SIZE);
407 	if (nd.ni_vp)
408 		vrele(nd.ni_vp);
409 
410 	return error;
411 }
412 
413 static char fallback_elf_brand[MAXBRANDLEN+1] = { "none" };
414 SYSCTL_STRING(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW,
415 		fallback_elf_brand, sizeof(fallback_elf_brand),
416 		"ELF brand of last resort");
417 
418 static int
419 exec_elf_imgact(struct image_params *imgp)
420 {
421 	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
422 	const Elf_Phdr *phdr;
423 	Elf_Auxargs *elf_auxargs = NULL;
424 	struct vmspace *vmspace;
425 	vm_prot_t prot;
426 	u_long text_size = 0, data_size = 0;
427 	u_long text_addr = 0, data_addr = 0;
428 	u_long addr, entry = 0, proghdr = 0;
429 	int error, i;
430 	const char *interp = NULL;
431 	Elf_Brandinfo *brand_info;
432 	const char *brand;
433 	char path[MAXPATHLEN];
434 
435 	/*
436 	 * Do we have a valid ELF header ?
437 	 */
438 	if (elf_check_header(hdr) != 0 || hdr->e_type != ET_EXEC)
439 		return -1;
440 
441 	/*
442 	 * From here on down, we return an errno, not -1, as we've
443 	 * detected an ELF file.
444 	 */
445 
446 	if ((hdr->e_phoff > PAGE_SIZE) ||
447 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
448 		/* Only support headers in first page for now */
449 		return ENOEXEC;
450 	}
451 	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
452 
453 	/*
454 	 * From this point on, we may have resources that need to be freed.
455 	 */
456 	if ((error = exec_extract_strings(imgp)) != 0)
457 		goto fail;
458 
459 	exec_new_vmspace(imgp);
460 
461 	vmspace = imgp->proc->p_vmspace;
462 
463 	for (i = 0; i < hdr->e_phnum; i++) {
464 		switch(phdr[i].p_type) {
465 
466 		case PT_LOAD:	/* Loadable segment */
467 			prot = 0;
468 			if (phdr[i].p_flags & PF_X)
469   				prot |= VM_PROT_EXECUTE;
470 			if (phdr[i].p_flags & PF_W)
471   				prot |= VM_PROT_WRITE;
472 			if (phdr[i].p_flags & PF_R)
473   				prot |= VM_PROT_READ;
474 
475 			if ((error = elf_load_section(imgp->proc,
476 						     vmspace, imgp->vp,
477   						     phdr[i].p_offset,
478   						     (caddr_t)phdr[i].p_vaddr,
479   						     phdr[i].p_memsz,
480   						     phdr[i].p_filesz, prot)) != 0)
481   				goto fail;
482 
483 			/*
484 			 * Is this .text or .data ??
485 			 *
486 			 * We only handle one each of those yet XXX
487 			 */
488 			if (hdr->e_entry >= phdr[i].p_vaddr &&
489 			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
490   				text_addr = trunc_page(phdr[i].p_vaddr);
491   				text_size = round_page(phdr[i].p_memsz +
492 						       phdr[i].p_vaddr -
493 						       text_addr);
494 				entry = (u_long)hdr->e_entry;
495 			} else {
496   				data_addr = trunc_page(phdr[i].p_vaddr);
497   				data_size = round_page(phdr[i].p_memsz +
498 						       phdr[i].p_vaddr -
499 						       data_addr);
500 			}
501 			break;
502 	  	case PT_INTERP:	/* Path to interpreter */
503 			if (phdr[i].p_filesz > MAXPATHLEN ||
504 			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
505 				error = ENOEXEC;
506 				goto fail;
507 			}
508 			interp = imgp->image_header + phdr[i].p_offset;
509 			break;
510 		case PT_PHDR: 	/* Program header table info */
511 			proghdr = phdr[i].p_vaddr;
512 			break;
513 		default:
514 			break;
515 		}
516 	}
517 
518 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
519 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
520 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
521 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
522 
523 	addr = ELF_RTLD_ADDR(vmspace);
524 
525 	imgp->entry_addr = entry;
526 
527 	/* If the executable has a brand, search for it in the brand list. */
528 	brand_info = NULL;
529 	brand = (const char *)&hdr->e_ident[EI_BRAND];
530 	if (brand[0] != '\0') {
531 		for (i = 0;  i < MAX_BRANDS;  i++) {
532 			Elf_Brandinfo *bi = elf_brand_list[i];
533 
534 			if (bi != NULL && strcmp(brand, bi->brand) == 0) {
535 				brand_info = bi;
536 				break;
537 			}
538 		}
539 	}
540 
541 	/* Lacking a known brand, search for a recognized interpreter. */
542 	if (brand_info == NULL && interp != NULL) {
543 		for (i = 0;  i < MAX_BRANDS;  i++) {
544 			Elf_Brandinfo *bi = elf_brand_list[i];
545 
546 			if (bi != NULL &&
547 			    strcmp(interp, bi->interp_path) == 0) {
548 				brand_info = bi;
549 				break;
550 			}
551 		}
552 	}
553 
554 	/* Lacking a recognized interpreter, try the default brand */
555 	if (brand_info == NULL && fallback_elf_brand[0] != '\0') {
556 		for (i = 0; i < MAX_BRANDS; i++) {
557 			Elf_Brandinfo *bi = elf_brand_list[i];
558 
559 			if (bi != NULL
560 			    && strcmp(fallback_elf_brand, bi->brand) == 0) {
561 				brand_info = bi;
562 				break;
563 			}
564 		}
565 	}
566 
567 #ifdef __alpha__
568 	/* XXX - Assume FreeBSD on the alpha. */
569 	if (brand_info == NULL)
570 		brand_info = &freebsd_brand_info;
571 #endif
572 
573 	if (brand_info == NULL) {
574 		if (brand[0] == 0)
575 			uprintf("ELF binary type not known."
576 			    "  Use \"brandelf\" to brand it.\n");
577 		else
578 			uprintf("ELF binary type \"%.*s\" not known.\n",
579 			    EI_NIDENT - EI_BRAND, brand);
580 		error = ENOEXEC;
581 		goto fail;
582 	}
583 
584 	imgp->proc->p_sysent = brand_info->sysvec;
585 	if (interp != NULL) {
586 	        snprintf(path, sizeof(path), "%s%s",
587 			 brand_info->emul_path, interp);
588 		if ((error = elf_load_file(imgp->proc, path, &addr,
589 					   &imgp->entry_addr)) != 0) {
590 		        if ((error = elf_load_file(imgp->proc, interp, &addr,
591 						   &imgp->entry_addr)) != 0) {
592 			        uprintf("ELF interpreter %s not found\n", path);
593 				goto fail;
594 			}
595                 }
596 	}
597 
598 	/*
599 	 * Construct auxargs table (used by the fixup routine)
600 	 */
601 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
602 	elf_auxargs->execfd = -1;
603 	elf_auxargs->phdr = proghdr;
604 	elf_auxargs->phent = hdr->e_phentsize;
605 	elf_auxargs->phnum = hdr->e_phnum;
606 	elf_auxargs->pagesz = PAGE_SIZE;
607 	elf_auxargs->base = addr;
608 	elf_auxargs->flags = 0;
609 	elf_auxargs->entry = entry;
610 	elf_auxargs->trace = elf_trace;
611 
612 	imgp->auxargs = elf_auxargs;
613 	imgp->interpreted = 0;
614 
615 	/* don't allow modifying the file while we run it */
616 	imgp->vp->v_flag |= VTEXT;
617 
618 fail:
619 	return error;
620 }
621 
622 static int
623 elf_freebsd_fixup(long **stack_base, struct image_params *imgp)
624 {
625 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
626 	long *pos;
627 
628 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
629 
630 	if (args->trace) {
631 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
632 	}
633 	if (args->execfd != -1) {
634 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
635 	}
636 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
637 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
638 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
639 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
640 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
641 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
642 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
643 	AUXARGS_ENTRY(pos, AT_NULL, 0);
644 
645 	free(imgp->auxargs, M_TEMP);
646 	imgp->auxargs = NULL;
647 
648 	(*stack_base)--;
649 	suword(*stack_base, (long) imgp->argc);
650 	return 0;
651 }
652 
653 /*
654  * Code for generating ELF core dumps.
655  */
656 
657 typedef void (*segment_callback) __P((vm_map_entry_t, void *));
658 
659 /* Closure for cb_put_phdr(). */
660 struct phdr_closure {
661 	Elf_Phdr *phdr;		/* Program header to fill in */
662 	Elf_Off offset;		/* Offset of segment in core file */
663 };
664 
665 /* Closure for cb_size_segment(). */
666 struct sseg_closure {
667 	int count;		/* Count of writable segments. */
668 	size_t size;		/* Total size of all writable segments. */
669 };
670 
671 static void cb_put_phdr __P((vm_map_entry_t, void *));
672 static void cb_size_segment __P((vm_map_entry_t, void *));
673 static void each_writable_segment __P((struct proc *, segment_callback,
674     void *));
675 static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *,
676     int, void *, size_t));
677 static void elf_puthdr __P((struct proc *, void *, size_t *,
678     const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int));
679 static void elf_putnote __P((void *, size_t *, const char *, int,
680     const void *, size_t));
681 
682 extern int osreldate;
683 
684 int
685 elf_coredump(p, vp, limit)
686 	register struct proc *p;
687 	register struct vnode *vp;
688 	off_t limit;
689 {
690 	register struct ucred *cred = p->p_cred->pc_ucred;
691 	int error = 0;
692 	struct sseg_closure seginfo;
693 	void *hdr;
694 	size_t hdrsize;
695 
696 	/* Size the program segments. */
697 	seginfo.count = 0;
698 	seginfo.size = 0;
699 	each_writable_segment(p, cb_size_segment, &seginfo);
700 
701 	/*
702 	 * Calculate the size of the core file header area by making
703 	 * a dry run of generating it.  Nothing is written, but the
704 	 * size is calculated.
705 	 */
706 	hdrsize = 0;
707 	elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize,
708 	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
709 	    (const prpsinfo_t *)NULL, seginfo.count);
710 
711 	if (hdrsize + seginfo.size >= limit)
712 		return (EFAULT);
713 
714 	/*
715 	 * Allocate memory for building the header, fill it up,
716 	 * and write it out.
717 	 */
718 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
719 	if (hdr == NULL) {
720 		return EINVAL;
721 	}
722 	error = elf_corehdr(p, vp, cred, seginfo.count, hdr, hdrsize);
723 
724 	/* Write the contents of all of the writable segments. */
725 	if (error == 0) {
726 		Elf_Phdr *php;
727 		off_t offset;
728 		int i;
729 
730 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
731 		offset = hdrsize;
732 		for (i = 0;  i < seginfo.count;  i++) {
733 			error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
734 			    php->p_filesz, offset, UIO_USERSPACE,
735 			    IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
736 			if (error != 0)
737 				break;
738 			offset += php->p_filesz;
739 			php++;
740 		}
741 	}
742 	free(hdr, M_TEMP);
743 
744 	return error;
745 }
746 
747 /*
748  * A callback for each_writable_segment() to write out the segment's
749  * program header entry.
750  */
751 static void
752 cb_put_phdr(entry, closure)
753 	vm_map_entry_t entry;
754 	void *closure;
755 {
756 	struct phdr_closure *phc = (struct phdr_closure *)closure;
757 	Elf_Phdr *phdr = phc->phdr;
758 
759 	phc->offset = round_page(phc->offset);
760 
761 	phdr->p_type = PT_LOAD;
762 	phdr->p_offset = phc->offset;
763 	phdr->p_vaddr = entry->start;
764 	phdr->p_paddr = 0;
765 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
766 	phdr->p_align = PAGE_SIZE;
767 	phdr->p_flags = 0;
768 	if (entry->protection & VM_PROT_READ)
769 		phdr->p_flags |= PF_R;
770 	if (entry->protection & VM_PROT_WRITE)
771 		phdr->p_flags |= PF_W;
772 	if (entry->protection & VM_PROT_EXECUTE)
773 		phdr->p_flags |= PF_X;
774 
775 	phc->offset += phdr->p_filesz;
776 	phc->phdr++;
777 }
778 
779 /*
780  * A callback for each_writable_segment() to gather information about
781  * the number of segments and their total size.
782  */
783 static void
784 cb_size_segment(entry, closure)
785 	vm_map_entry_t entry;
786 	void *closure;
787 {
788 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
789 
790 	ssc->count++;
791 	ssc->size += entry->end - entry->start;
792 }
793 
794 /*
795  * For each writable segment in the process's memory map, call the given
796  * function with a pointer to the map entry and some arbitrary
797  * caller-supplied data.
798  */
799 static void
800 each_writable_segment(p, func, closure)
801 	struct proc *p;
802 	segment_callback func;
803 	void *closure;
804 {
805 	vm_map_t map = &p->p_vmspace->vm_map;
806 	vm_map_entry_t entry;
807 
808 	for (entry = map->header.next;  entry != &map->header;
809 	    entry = entry->next) {
810 		vm_object_t obj;
811 
812 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
813 		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
814 		    (VM_PROT_READ|VM_PROT_WRITE))
815 			continue;
816 
817 		if ((obj = entry->object.vm_object) == NULL)
818 			continue;
819 
820 		/* Find the deepest backing object. */
821 		while (obj->backing_object != NULL)
822 			obj = obj->backing_object;
823 
824 		/* Ignore memory-mapped devices and such things. */
825 		if (obj->type != OBJT_DEFAULT &&
826 		    obj->type != OBJT_SWAP &&
827 		    obj->type != OBJT_VNODE)
828 			continue;
829 
830 		(*func)(entry, closure);
831 	}
832 }
833 
834 /*
835  * Write the core file header to the file, including padding up to
836  * the page boundary.
837  */
838 static int
839 elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
840 	struct proc *p;
841 	struct vnode *vp;
842 	struct ucred *cred;
843 	int numsegs;
844 	size_t hdrsize;
845 	void *hdr;
846 {
847 	size_t off;
848 	prstatus_t status;
849 	prfpregset_t fpregset;
850 	prpsinfo_t psinfo;
851 
852 	/* Gather the information for the header. */
853 	bzero(&status, sizeof status);
854 	status.pr_version = PRSTATUS_VERSION;
855 	status.pr_statussz = sizeof(prstatus_t);
856 	status.pr_gregsetsz = sizeof(gregset_t);
857 	status.pr_fpregsetsz = sizeof(fpregset_t);
858 	status.pr_osreldate = osreldate;
859 	status.pr_cursig = p->p_sig;
860 	status.pr_pid = p->p_pid;
861 	fill_regs(p, &status.pr_reg);
862 
863 	fill_fpregs(p, &fpregset);
864 
865 	bzero(&psinfo, sizeof psinfo);
866 	psinfo.pr_version = PRPSINFO_VERSION;
867 	psinfo.pr_psinfosz = sizeof(prpsinfo_t);
868 	strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN);
869 	/* XXX - We don't fill in the command line arguments properly yet. */
870 	strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ);
871 
872 	/* Fill in the header. */
873 	bzero(hdr, hdrsize);
874 	off = 0;
875 	elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs);
876 
877 	/* Write it to the core file. */
878 	return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
879 	    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
880 }
881 
882 static void
883 elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
884     const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
885 {
886 	size_t ehoff;
887 	size_t phoff;
888 	size_t noteoff;
889 	size_t notesz;
890 
891 	ehoff = *off;
892 	*off += sizeof(Elf_Ehdr);
893 
894 	phoff = *off;
895 	*off += (numsegs + 1) * sizeof(Elf_Phdr);
896 
897 	noteoff = *off;
898 	elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
899 	    sizeof *status);
900 	elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
901 	    sizeof *fpregset);
902 	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
903 	    sizeof *psinfo);
904 	notesz = *off - noteoff;
905 
906 	/* Align up to a page boundary for the program segments. */
907 	*off = round_page(*off);
908 
909 	if (dst != NULL) {
910 		Elf_Ehdr *ehdr;
911 		Elf_Phdr *phdr;
912 		struct phdr_closure phc;
913 
914 		/*
915 		 * Fill in the ELF header.
916 		 */
917 		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
918 		ehdr->e_ident[EI_MAG0] = ELFMAG0;
919 		ehdr->e_ident[EI_MAG1] = ELFMAG1;
920 		ehdr->e_ident[EI_MAG2] = ELFMAG2;
921 		ehdr->e_ident[EI_MAG3] = ELFMAG3;
922 		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
923 		ehdr->e_ident[EI_DATA] = ELF_DATA;
924 		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
925 		ehdr->e_ident[EI_PAD] = 0;
926 		strncpy(ehdr->e_ident + EI_BRAND, "FreeBSD",
927 		    EI_NIDENT - EI_BRAND);
928 		ehdr->e_type = ET_CORE;
929 		ehdr->e_machine = ELF_ARCH;
930 		ehdr->e_version = EV_CURRENT;
931 		ehdr->e_entry = 0;
932 		ehdr->e_phoff = phoff;
933 		ehdr->e_flags = 0;
934 		ehdr->e_ehsize = sizeof(Elf_Ehdr);
935 		ehdr->e_phentsize = sizeof(Elf_Phdr);
936 		ehdr->e_phnum = numsegs + 1;
937 		ehdr->e_shentsize = sizeof(Elf_Shdr);
938 		ehdr->e_shnum = 0;
939 		ehdr->e_shstrndx = SHN_UNDEF;
940 
941 		/*
942 		 * Fill in the program header entries.
943 		 */
944 		phdr = (Elf_Phdr *)((char *)dst + phoff);
945 
946 		/* The note segement. */
947 		phdr->p_type = PT_NOTE;
948 		phdr->p_offset = noteoff;
949 		phdr->p_vaddr = 0;
950 		phdr->p_paddr = 0;
951 		phdr->p_filesz = notesz;
952 		phdr->p_memsz = 0;
953 		phdr->p_flags = 0;
954 		phdr->p_align = 0;
955 		phdr++;
956 
957 		/* All the writable segments from the program. */
958 		phc.phdr = phdr;
959 		phc.offset = *off;
960 		each_writable_segment(p, cb_put_phdr, &phc);
961 	}
962 }
963 
964 static void
965 elf_putnote(void *dst, size_t *off, const char *name, int type,
966     const void *desc, size_t descsz)
967 {
968 	Elf_Note note;
969 
970 	note.n_namesz = strlen(name) + 1;
971 	note.n_descsz = descsz;
972 	note.n_type = type;
973 	if (dst != NULL)
974 		bcopy(&note, (char *)dst + *off, sizeof note);
975 	*off += sizeof note;
976 	if (dst != NULL)
977 		bcopy(name, (char *)dst + *off, note.n_namesz);
978 	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
979 	if (dst != NULL)
980 		bcopy(desc, (char *)dst + *off, note.n_descsz);
981 	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
982 }
983 
984 /*
985  * Tell kern_execve.c about it, with a little help from the linker.
986  */
987 static struct execsw elf_execsw = {exec_elf_imgact, "ELF"};
988 EXEC_SET(elf, elf_execsw);
989