xref: /freebsd/sys/kern/imgact_elf.c (revision c4f6a2a9e1b1879b618c436ab4f56ff75c73a0f5)
1 /*-
2  * Copyright (c) 2000 David O'Brien
3  * Copyright (c) 1995-1996 S�ren Schmidt
4  * Copyright (c) 1996 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #include <sys/param.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/mman.h>
43 #include <sys/namei.h>
44 #include <sys/pioctl.h>
45 #include <sys/proc.h>
46 #include <sys/procfs.h>
47 #include <sys/resourcevar.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/stat.h>
51 #include <sys/sx.h>
52 #include <sys/syscall.h>
53 #include <sys/sysctl.h>
54 #include <sys/sysent.h>
55 #include <sys/vnode.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_param.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_extern.h>
64 
65 #include <machine/elf.h>
66 #include <machine/md_var.h>
67 
68 #define OLD_EI_BRAND	8
69 
70 __ElfType(Brandinfo);
71 __ElfType(Auxargs);
72 
73 static int __elfN(check_header)(const Elf_Ehdr *hdr);
74 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
75     u_long *entry, size_t pagesize);
76 static int __elfN(load_section)(struct proc *p,
77     struct vmspace *vmspace, struct vnode *vp, vm_object_t object,
78     vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
79     vm_prot_t prot, size_t pagesize);
80 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
81 
82 static int elf_trace = 0;
83 #if __ELF_WORD_SIZE == 32
84 SYSCTL_INT(_debug, OID_AUTO, elf32_trace, CTLFLAG_RW, &elf_trace, 0, "");
85 #else
86 SYSCTL_INT(_debug, OID_AUTO, elf64_trace, CTLFLAG_RW, &elf_trace, 0, "");
87 #endif
88 
89 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
90 
91 int
92 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
93 {
94 	int i;
95 
96 	for (i=0; i<MAX_BRANDS; i++) {
97 		if (elf_brand_list[i] == NULL) {
98 			elf_brand_list[i] = entry;
99 			break;
100 		}
101 	}
102 	if (i == MAX_BRANDS)
103 		return -1;
104 	return 0;
105 }
106 
107 int
108 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
109 {
110 	int i;
111 
112 	for (i=0; i<MAX_BRANDS; i++) {
113 		if (elf_brand_list[i] == entry) {
114 			elf_brand_list[i] = NULL;
115 			break;
116 		}
117 	}
118 	if (i == MAX_BRANDS)
119 		return -1;
120 	return 0;
121 }
122 
123 int
124 __elfN(brand_inuse)(Elf_Brandinfo *entry)
125 {
126 	struct proc *p;
127 	int rval = FALSE;
128 
129 	sx_slock(&allproc_lock);
130 	LIST_FOREACH(p, &allproc, p_list) {
131 		if (p->p_sysent == entry->sysvec) {
132 			rval = TRUE;
133 			break;
134 		}
135 	}
136 	sx_sunlock(&allproc_lock);
137 
138 	return (rval);
139 }
140 
141 static int
142 __elfN(check_header)(const Elf_Ehdr *hdr)
143 {
144 	int i;
145 
146 	if (!IS_ELF(*hdr) ||
147 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
148 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
149 	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
150 		return ENOEXEC;
151 
152 	/*
153 	 * Make sure we have at least one brand for this machine.
154 	 */
155 
156 	for (i=0; i<MAX_BRANDS; i++) {
157 		if (elf_brand_list[i]->machine == hdr->e_machine)
158 			break;
159 	}
160 	if (i == MAX_BRANDS)
161 		return ENOEXEC;
162 
163 	if (hdr->e_version != ELF_TARG_VER)
164 		return ENOEXEC;
165 
166 	return 0;
167 }
168 
169 static int
170 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
171 	vm_offset_t start, vm_offset_t end, vm_prot_t prot,
172 	vm_prot_t max)
173 {
174 	int error, rv;
175 	vm_offset_t off;
176 	vm_offset_t data_buf = 0;
177 
178 	/*
179 	 * Create the page if it doesn't exist yet. Ignore errors.
180 	 */
181 	vm_map_lock(map);
182 	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
183 		      max, max, 0);
184 	vm_map_unlock(map);
185 
186 	/*
187 	 * Find the page from the underlying object.
188 	 */
189 	if (object) {
190 		vm_object_reference(object);
191 		rv = vm_map_find(exec_map,
192 				 object,
193 				 trunc_page(offset),
194 				 &data_buf,
195 				 PAGE_SIZE,
196 				 TRUE,
197 				 VM_PROT_READ,
198 				 VM_PROT_ALL,
199 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
200 		if (rv != KERN_SUCCESS) {
201 			vm_object_deallocate(object);
202 			return rv;
203 		}
204 
205 		off = offset - trunc_page(offset);
206 		error = copyout((caddr_t)data_buf+off, (caddr_t)start, end - start);
207 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
208 		if (error) {
209 			return KERN_FAILURE;
210 		}
211 	}
212 
213 	return KERN_SUCCESS;
214 }
215 
216 static int
217 __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
218 	vm_offset_t start, vm_offset_t end, vm_prot_t prot,
219 	vm_prot_t max, int cow)
220 {
221 	int rv;
222 
223 	if (start != trunc_page(start)) {
224 		rv = __elfN(map_partial)(map, object, offset,
225 				       start, round_page(start), prot, max);
226 		if (rv)
227 			return rv;
228 		offset += round_page(start) - start;
229 		start = round_page(start);
230 	}
231 	if (end != round_page(end)) {
232 		rv = __elfN(map_partial)(map, object,
233 				       offset + trunc_page(end) - start,
234 				       trunc_page(end), end, prot, max);
235 		if (rv)
236 			return rv;
237 		end = trunc_page(end);
238 	}
239 	if (end > start) {
240 		if (offset & PAGE_MASK) {
241 			vm_offset_t data_buf, off;
242 			vm_size_t sz;
243 			int error;
244 
245 			/*
246 			 * The mapping is not page aligned. This means we have
247 			 * to copy the data. Sigh.
248 			 */
249 			rv = vm_map_find(map, 0, 0,
250 					 &start, end - start,
251 					 FALSE, prot, max, 0);
252 			if (rv)
253 				return rv;
254 			while (start < end) {
255 				vm_object_reference(object);
256 				rv = vm_map_find(exec_map,
257 						 object,
258 						 trunc_page(offset),
259 						 &data_buf,
260 						 2*PAGE_SIZE,
261 						 TRUE,
262 						 VM_PROT_READ,
263 						 VM_PROT_ALL,
264 						 (MAP_COPY_ON_WRITE
265 						  | MAP_PREFAULT_PARTIAL));
266 				if (rv != KERN_SUCCESS) {
267 					vm_object_deallocate(object);
268 					return rv;
269 				}
270 				off = offset - trunc_page(offset);
271 				sz = end - start;
272 				if (sz > PAGE_SIZE)
273 					sz = PAGE_SIZE;
274 				error = copyout((caddr_t)data_buf+off,
275 						(caddr_t)start, sz);
276 				vm_map_remove(exec_map, data_buf,
277 					      data_buf + 2*PAGE_SIZE);
278 				if (error) {
279 					return KERN_FAILURE;
280 				}
281 				start += sz;
282 			}
283 			rv = KERN_SUCCESS;
284 		} else {
285 			vm_map_lock(map);
286 			rv =  vm_map_insert(map, object, offset, start, end,
287 					    prot, max, cow);
288 			vm_map_unlock(map);
289 		}
290 		return rv;
291 	} else {
292 		return KERN_SUCCESS;
293 	}
294 }
295 
296 static int
297 __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
298 	struct vnode *vp, vm_object_t object, vm_offset_t offset,
299 	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
300 	size_t pagesize)
301 {
302 	size_t map_len;
303 	vm_offset_t map_addr;
304 	int error, rv;
305 	size_t copy_len;
306 	vm_offset_t file_addr;
307 	vm_offset_t data_buf = 0;
308 
309 	GIANT_REQUIRED;
310 
311 	error = 0;
312 
313 	/*
314 	 * It's necessary to fail if the filsz + offset taken from the
315 	 * header is greater than the actual file pager object's size.
316 	 * If we were to allow this, then the vm_map_find() below would
317 	 * walk right off the end of the file object and into the ether.
318 	 *
319 	 * While I'm here, might as well check for something else that
320 	 * is invalid: filsz cannot be greater than memsz.
321 	 */
322 	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
323 	    filsz > memsz) {
324 		uprintf("elf_load_section: truncated ELF file\n");
325 		return (ENOEXEC);
326 	}
327 
328 #define trunc_page_ps(va, ps)	((va) & ~(ps - 1))
329 #define round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
330 
331 	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
332 	file_addr = trunc_page_ps(offset, pagesize);
333 
334 	/*
335 	 * We have two choices.  We can either clear the data in the last page
336 	 * of an oversized mapping, or we can start the anon mapping a page
337 	 * early and copy the initialized data into that first page.  We
338 	 * choose the second..
339 	 */
340 	if (memsz > filsz)
341 		map_len = trunc_page_ps(offset+filsz, pagesize) - file_addr;
342 	else
343 		map_len = round_page_ps(offset+filsz, pagesize) - file_addr;
344 
345 	if (map_len != 0) {
346 		vm_object_reference(object);
347 		rv = __elfN(map_insert)(&vmspace->vm_map,
348 				      object,
349 				      file_addr,	/* file offset */
350 				      map_addr,		/* virtual start */
351 				      map_addr + map_len,/* virtual end */
352 				      prot,
353 				      VM_PROT_ALL,
354 				      MAP_COPY_ON_WRITE | MAP_PREFAULT);
355 		if (rv != KERN_SUCCESS) {
356 			vm_object_deallocate(object);
357 			return EINVAL;
358 		}
359 
360 		/* we can stop now if we've covered it all */
361 		if (memsz == filsz) {
362 			return 0;
363 		}
364 	}
365 
366 
367 	/*
368 	 * We have to get the remaining bit of the file into the first part
369 	 * of the oversized map segment.  This is normally because the .data
370 	 * segment in the file is extended to provide bss.  It's a neat idea
371 	 * to try and save a page, but it's a pain in the behind to implement.
372 	 */
373 	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
374 	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
375 	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - map_addr;
376 
377 	/* This had damn well better be true! */
378 	if (map_len != 0) {
379 		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0,
380 			map_addr, map_addr + map_len,
381 			VM_PROT_ALL, VM_PROT_ALL, 0);
382 		if (rv != KERN_SUCCESS) {
383 			return EINVAL;
384 		}
385 	}
386 
387 	if (copy_len != 0) {
388 		vm_offset_t off;
389 		vm_object_reference(object);
390 		rv = vm_map_find(exec_map,
391 				 object,
392 				 trunc_page(offset + filsz),
393 				 &data_buf,
394 				 PAGE_SIZE,
395 				 TRUE,
396 				 VM_PROT_READ,
397 				 VM_PROT_ALL,
398 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
399 		if (rv != KERN_SUCCESS) {
400 			vm_object_deallocate(object);
401 			return EINVAL;
402 		}
403 
404 		/* send the page fragment to user space */
405 		off = trunc_page_ps(offset + filsz, pagesize)
406 			- trunc_page(offset + filsz);
407 		error = copyout((caddr_t)data_buf+off, (caddr_t)map_addr,
408 			copy_len);
409 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
410 		if (error) {
411 			return (error);
412 		}
413 	}
414 
415 	/*
416 	 * set it to the specified protection.
417 	 * XXX had better undo the damage from pasting over the cracks here!
418 	 */
419 	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
420 	    round_page(map_addr + map_len),  prot, FALSE);
421 
422 	return error;
423 }
424 
425 /*
426  * Load the file "file" into memory.  It may be either a shared object
427  * or an executable.
428  *
429  * The "addr" reference parameter is in/out.  On entry, it specifies
430  * the address where a shared object should be loaded.  If the file is
431  * an executable, this value is ignored.  On exit, "addr" specifies
432  * where the file was actually loaded.
433  *
434  * The "entry" reference parameter is out only.  On exit, it specifies
435  * the entry point for the loaded file.
436  */
437 static int
438 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
439 	u_long *entry, size_t pagesize)
440 {
441 	struct {
442 		struct nameidata nd;
443 		struct vattr attr;
444 		struct image_params image_params;
445 	} *tempdata;
446 	const Elf_Ehdr *hdr = NULL;
447 	const Elf_Phdr *phdr = NULL;
448 	struct nameidata *nd;
449 	struct vmspace *vmspace = p->p_vmspace;
450 	struct vattr *attr;
451 	struct image_params *imgp;
452 	vm_prot_t prot;
453 	u_long rbase;
454 	u_long base_addr = 0;
455 	int error, i, numsegs;
456 
457 	if (curthread->td_proc != p)
458 		panic("elf_load_file - thread");	/* XXXKSE DIAGNOSTIC */
459 
460 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
461 	nd = &tempdata->nd;
462 	attr = &tempdata->attr;
463 	imgp = &tempdata->image_params;
464 
465 	/*
466 	 * Initialize part of the common data
467 	 */
468 	imgp->proc = p;
469 	imgp->uap = NULL;
470 	imgp->attr = attr;
471 	imgp->firstpage = NULL;
472 	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
473 	imgp->object = NULL;
474 
475 	if (imgp->image_header == NULL) {
476 		nd->ni_vp = NULL;
477 		error = ENOMEM;
478 		goto fail;
479 	}
480 
481 	/* XXXKSE */
482 	NDINIT(nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, curthread);
483 
484 	if ((error = namei(nd)) != 0) {
485 		nd->ni_vp = NULL;
486 		goto fail;
487 	}
488 	NDFREE(nd, NDF_ONLY_PNBUF);
489 	imgp->vp = nd->ni_vp;
490 
491 	/*
492 	 * Check permissions, modes, uid, etc on the file, and "open" it.
493 	 */
494 	error = exec_check_permissions(imgp);
495 	if (error) {
496 		VOP_UNLOCK(nd->ni_vp, 0, curthread); /* XXXKSE */
497 		goto fail;
498 	}
499 
500 	error = exec_map_first_page(imgp);
501 	/*
502 	 * Also make certain that the interpreter stays the same, so set
503 	 * its VV_TEXT flag, too.
504 	 */
505 	if (error == 0)
506 		nd->ni_vp->v_vflag |= VV_TEXT;
507 
508 	VOP_GETVOBJECT(nd->ni_vp, &imgp->object);
509 	vm_object_reference(imgp->object);
510 
511 	VOP_UNLOCK(nd->ni_vp, 0, curthread); /* XXXKSE */
512 	if (error)
513 		goto fail;
514 
515 	hdr = (const Elf_Ehdr *)imgp->image_header;
516 	if ((error = __elfN(check_header)(hdr)) != 0)
517 		goto fail;
518 	if (hdr->e_type == ET_DYN)
519 		rbase = *addr;
520 	else if (hdr->e_type == ET_EXEC)
521 		rbase = 0;
522 	else {
523 		error = ENOEXEC;
524 		goto fail;
525 	}
526 
527 	/* Only support headers that fit within first page for now */
528 	if ((hdr->e_phoff > PAGE_SIZE) ||
529 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
530 		error = ENOEXEC;
531 		goto fail;
532 	}
533 
534 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
535 
536 	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
537 		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
538 			prot = 0;
539 			if (phdr[i].p_flags & PF_X)
540   				prot |= VM_PROT_EXECUTE;
541 			if (phdr[i].p_flags & PF_W)
542   				prot |= VM_PROT_WRITE;
543 			if (phdr[i].p_flags & PF_R)
544   				prot |= VM_PROT_READ;
545 
546 			if ((error = __elfN(load_section)
547 			     (p, vmspace, nd->ni_vp,
548 			      imgp->object,
549 			      phdr[i].p_offset,
550 			      (caddr_t)(uintptr_t)phdr[i].p_vaddr +
551 			      rbase,
552 			      phdr[i].p_memsz,
553 			      phdr[i].p_filesz, prot, pagesize)) != 0)
554 				goto fail;
555 			/*
556 			 * Establish the base address if this is the
557 			 * first segment.
558 			 */
559 			if (numsegs == 0)
560   				base_addr = trunc_page(phdr[i].p_vaddr + rbase);
561 			numsegs++;
562 		}
563 	}
564 	*addr = base_addr;
565 	*entry=(unsigned long)hdr->e_entry + rbase;
566 
567 fail:
568 	if (imgp->firstpage)
569 		exec_unmap_first_page(imgp);
570 	if (imgp->image_header)
571 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
572 			PAGE_SIZE);
573 	if (imgp->object)
574 		vm_object_deallocate(imgp->object);
575 
576 	if (nd->ni_vp)
577 		vrele(nd->ni_vp);
578 
579 	free(tempdata, M_TEMP);
580 
581 	return error;
582 }
583 
584 extern int fallback_elf_brand;
585 
586 static int
587 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
588 {
589 	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
590 	const Elf_Phdr *phdr;
591 	Elf_Auxargs *elf_auxargs = NULL;
592 	struct vmspace *vmspace;
593 	vm_prot_t prot;
594 	u_long text_size = 0, data_size = 0;
595 	u_long text_addr = 0, data_addr = 0;
596 	u_long addr, entry = 0, proghdr = 0;
597 	vm_offset_t maxuser, usrstack, pagesize;
598 	int error, i;
599 	const char *interp = NULL;
600 	Elf_Brandinfo *brand_info;
601 	char *path;
602 	struct thread *td = curthread;
603 
604 	GIANT_REQUIRED;
605 
606 	/*
607 	 * Do we have a valid ELF header ?
608 	 */
609 	if (__elfN(check_header)(hdr) != 0 || hdr->e_type != ET_EXEC)
610 		return -1;
611 
612 	/*
613 	 * From here on down, we return an errno, not -1, as we've
614 	 * detected an ELF file.
615 	 */
616 
617 	if ((hdr->e_phoff > PAGE_SIZE) ||
618 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
619 		/* Only support headers in first page for now */
620 		return ENOEXEC;
621 	}
622 	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
623 
624 	/*
625 	 * From this point on, we may have resources that need to be freed.
626 	 */
627 
628 	VOP_UNLOCK(imgp->vp, 0, td);
629 
630 	if ((error = exec_extract_strings(imgp)) != 0)
631 		goto fail;
632 
633 	/*
634 	 * Tentatively identify the brand based on the machine so that
635 	 * we can figure out VM ranges and page sizes.
636 	 */
637 	brand_info = NULL;
638 	for (i = 0;  i < MAX_BRANDS;  i++) {
639 		Elf_Brandinfo *bi = elf_brand_list[i];
640 
641 		if (bi != NULL &&
642 		    hdr->e_machine == bi->machine &&
643 		    (hdr->e_ident[EI_OSABI] == bi->brand
644 		     || 0 ==
645 		     strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
646 		     bi->compat_3_brand, strlen(bi->compat_3_brand)))) {
647 			brand_info = bi;
648 			break;
649 		}
650 	}
651 
652 	pagesize = PAGE_SIZE;
653 	maxuser = VM_MAXUSER_ADDRESS;
654 	usrstack = USRSTACK;
655 	if (brand_info) {
656 		if (brand_info->sysvec->sv_pagesize)
657 			pagesize = brand_info->sysvec->sv_pagesize;
658 		if (brand_info->sysvec->sv_maxuser)
659 			maxuser = brand_info->sysvec->sv_maxuser;
660 		if (brand_info->sysvec->sv_usrstack)
661 			usrstack = brand_info->sysvec->sv_usrstack;
662 	}
663 
664 	exec_new_vmspace(imgp, VM_MIN_ADDRESS, maxuser, usrstack);
665 
666 	vmspace = imgp->proc->p_vmspace;
667 
668 	for (i = 0; i < hdr->e_phnum; i++) {
669 		switch(phdr[i].p_type) {
670 
671 		case PT_LOAD:	/* Loadable segment */
672 			prot = 0;
673 			if (phdr[i].p_flags & PF_X)
674   				prot |= VM_PROT_EXECUTE;
675 			if (phdr[i].p_flags & PF_W)
676   				prot |= VM_PROT_WRITE;
677 			if (phdr[i].p_flags & PF_R)
678   				prot |= VM_PROT_READ;
679 
680 #if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
681 			/*
682 			 * Some x86 binaries assume read == executable,
683 			 * notably the M3 runtime and therefore cvsup
684 			 */
685 			if (prot & VM_PROT_READ)
686 				prot |= VM_PROT_EXECUTE;
687 #endif
688 
689 			if ((error = __elfN(load_section)
690 			     (imgp->proc,
691 			      vmspace, imgp->vp,
692 			      imgp->object,
693 			      phdr[i].p_offset,
694 			      (caddr_t)(uintptr_t)phdr[i].p_vaddr,
695 			      phdr[i].p_memsz,
696 			      phdr[i].p_filesz, prot, pagesize)) != 0)
697   				goto fail;
698 
699 			/*
700 			 * Is this .text or .data ??
701 			 *
702 			 * We only handle one each of those yet XXX
703 			 */
704 			if (hdr->e_entry >= phdr[i].p_vaddr &&
705 			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
706   				text_addr = trunc_page(phdr[i].p_vaddr);
707   				text_size = round_page(phdr[i].p_memsz +
708 						       phdr[i].p_vaddr -
709 						       text_addr);
710 				entry = (u_long)hdr->e_entry;
711 			} else {
712   				data_addr = trunc_page(phdr[i].p_vaddr);
713   				data_size = round_page(phdr[i].p_memsz +
714 						       phdr[i].p_vaddr -
715 						       data_addr);
716 			}
717 			break;
718 	  	case PT_INTERP:	/* Path to interpreter */
719 			if (phdr[i].p_filesz > MAXPATHLEN ||
720 			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
721 				error = ENOEXEC;
722 				goto fail;
723 			}
724 			interp = imgp->image_header + phdr[i].p_offset;
725 			break;
726 		case PT_PHDR: 	/* Program header table info */
727 			proghdr = phdr[i].p_vaddr;
728 			break;
729 		default:
730 			break;
731 		}
732 	}
733 
734 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
735 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
736 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
737 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
738 
739 	addr = ELF_RTLD_ADDR(vmspace);
740 
741 	imgp->entry_addr = entry;
742 
743 	brand_info = NULL;
744 
745 	/* We support three types of branding -- (1) the ELF EI_OSABI field
746 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
747 	 * branding w/in the ELF header, and (3) path of the `interp_path'
748 	 * field.  We should also look for an ".note.ABI-tag" ELF section now
749 	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
750 	 */
751 
752 	/* If the executable has a brand, search for it in the brand list. */
753 	if (brand_info == NULL) {
754 		for (i = 0;  i < MAX_BRANDS;  i++) {
755 			Elf_Brandinfo *bi = elf_brand_list[i];
756 
757 			if (bi != NULL &&
758 			    hdr->e_machine == bi->machine &&
759 			    (hdr->e_ident[EI_OSABI] == bi->brand
760 			    || 0 ==
761 			    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
762 			    bi->compat_3_brand, strlen(bi->compat_3_brand)))) {
763 				brand_info = bi;
764 				break;
765 			}
766 		}
767 	}
768 
769 	/* Lacking a known brand, search for a recognized interpreter. */
770 	if (brand_info == NULL && interp != NULL) {
771 		for (i = 0;  i < MAX_BRANDS;  i++) {
772 			Elf_Brandinfo *bi = elf_brand_list[i];
773 
774 			if (bi != NULL &&
775 			    hdr->e_machine == bi->machine &&
776 			    strcmp(interp, bi->interp_path) == 0) {
777 				brand_info = bi;
778 				break;
779 			}
780 		}
781 	}
782 
783 	/* Lacking a recognized interpreter, try the default brand */
784 	if (brand_info == NULL) {
785 		for (i = 0; i < MAX_BRANDS; i++) {
786 			Elf_Brandinfo *bi = elf_brand_list[i];
787 
788 			if (bi != NULL &&
789 			    hdr->e_machine == bi->machine &&
790 			    fallback_elf_brand == bi->brand) {
791 				brand_info = bi;
792 				break;
793 			}
794 		}
795 	}
796 
797 	if (brand_info == NULL) {
798 		uprintf("ELF binary type \"%u\" not known.\n",
799 		    hdr->e_ident[EI_OSABI]);
800 		error = ENOEXEC;
801 		goto fail;
802 	}
803 
804 	imgp->proc->p_sysent = brand_info->sysvec;
805 	if (interp != NULL) {
806 		path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
807 		snprintf(path, MAXPATHLEN, "%s%s",
808 			 brand_info->emul_path, interp);
809 		if ((error = __elfN(load_file)(imgp->proc, path, &addr,
810 					       &imgp->entry_addr,
811 					       pagesize)) != 0) {
812 			if ((error = __elfN(load_file)
813 			     (imgp->proc, interp, &addr,
814 			      &imgp->entry_addr, pagesize)) != 0) {
815 				uprintf("ELF interpreter %s not found\n", path);
816 				free(path, M_TEMP);
817 				goto fail;
818 			}
819 		}
820 		free(path, M_TEMP);
821 	}
822 
823 	/*
824 	 * Construct auxargs table (used by the fixup routine)
825 	 */
826 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
827 	elf_auxargs->execfd = -1;
828 	elf_auxargs->phdr = proghdr;
829 	elf_auxargs->phent = hdr->e_phentsize;
830 	elf_auxargs->phnum = hdr->e_phnum;
831 	elf_auxargs->pagesz = PAGE_SIZE;
832 	elf_auxargs->base = addr;
833 	elf_auxargs->flags = 0;
834 	elf_auxargs->entry = entry;
835 	elf_auxargs->trace = elf_trace;
836 
837 	imgp->auxargs = elf_auxargs;
838 	imgp->interpreted = 0;
839 
840 fail:
841 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td);
842 	return error;
843 }
844 
845 #if __ELF_WORD_SIZE == 32
846 #define suword	suword32
847 #define stacktype u_int32_t
848 #else
849 #define suword	suword64
850 #define stacktype u_int64_t
851 #endif
852 
853 int
854 __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
855 {
856 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
857 	stacktype *base;
858 	stacktype *pos;
859 
860 	base = (stacktype *)*stack_base;
861 	pos = base + (imgp->argc + imgp->envc + 2);
862 
863 	if (args->trace) {
864 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
865 	}
866 	if (args->execfd != -1) {
867 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
868 	}
869 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
870 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
871 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
872 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
873 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
874 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
875 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
876 	AUXARGS_ENTRY(pos, AT_NULL, 0);
877 
878 	free(imgp->auxargs, M_TEMP);
879 	imgp->auxargs = NULL;
880 
881 	base--;
882 	suword(base, (long) imgp->argc);
883 	*stack_base = (register_t *)base;
884 	return 0;
885 }
886 
887 /*
888  * Code for generating ELF core dumps.
889  */
890 
891 typedef void (*segment_callback)(vm_map_entry_t, void *);
892 
893 /* Closure for cb_put_phdr(). */
894 struct phdr_closure {
895 	Elf_Phdr *phdr;		/* Program header to fill in */
896 	Elf_Off offset;		/* Offset of segment in core file */
897 };
898 
899 /* Closure for cb_size_segment(). */
900 struct sseg_closure {
901 	int count;		/* Count of writable segments. */
902 	size_t size;		/* Total size of all writable segments. */
903 };
904 
905 static void cb_put_phdr(vm_map_entry_t, void *);
906 static void cb_size_segment(vm_map_entry_t, void *);
907 static void each_writable_segment(struct proc *, segment_callback, void *);
908 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
909     int, void *, size_t);
910 static void __elfN(puthdr)(struct proc *, void *, size_t *,
911     const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int);
912 static void __elfN(putnote)(void *, size_t *, const char *, int,
913     const void *, size_t);
914 
915 extern int osreldate;
916 
917 int
918 __elfN(coredump)(td, vp, limit)
919 	struct thread *td;
920 	register struct vnode *vp;
921 	off_t limit;
922 {
923 	register struct proc *p = td->td_proc;
924 	register struct ucred *cred = td->td_ucred;
925 	int error = 0;
926 	struct sseg_closure seginfo;
927 	void *hdr;
928 	size_t hdrsize;
929 
930 	/* Size the program segments. */
931 	seginfo.count = 0;
932 	seginfo.size = 0;
933 	each_writable_segment(p, cb_size_segment, &seginfo);
934 
935 	/*
936 	 * Calculate the size of the core file header area by making
937 	 * a dry run of generating it.  Nothing is written, but the
938 	 * size is calculated.
939 	 */
940 	hdrsize = 0;
941 	__elfN(puthdr)((struct proc *)NULL, (void *)NULL, &hdrsize,
942 	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
943 	    (const prpsinfo_t *)NULL, seginfo.count);
944 
945 	if (hdrsize + seginfo.size >= limit)
946 		return (EFAULT);
947 
948 	/*
949 	 * Allocate memory for building the header, fill it up,
950 	 * and write it out.
951 	 */
952 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
953 	if (hdr == NULL) {
954 		return EINVAL;
955 	}
956 	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
957 
958 	/* Write the contents of all of the writable segments. */
959 	if (error == 0) {
960 		Elf_Phdr *php;
961 		off_t offset;
962 		int i;
963 
964 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
965 		offset = hdrsize;
966 		for (i = 0;  i < seginfo.count;  i++) {
967 			error = vn_rdwr_inchunks(UIO_WRITE, vp,
968 			    (caddr_t)(uintptr_t)php->p_vaddr,
969 			    php->p_filesz, offset, UIO_USERSPACE,
970 			    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *)NULL,
971 			    curthread); /* XXXKSE */
972 			if (error != 0)
973 				break;
974 			offset += php->p_filesz;
975 			php++;
976 		}
977 	}
978 	free(hdr, M_TEMP);
979 
980 	return error;
981 }
982 
983 /*
984  * A callback for each_writable_segment() to write out the segment's
985  * program header entry.
986  */
987 static void
988 cb_put_phdr(entry, closure)
989 	vm_map_entry_t entry;
990 	void *closure;
991 {
992 	struct phdr_closure *phc = (struct phdr_closure *)closure;
993 	Elf_Phdr *phdr = phc->phdr;
994 
995 	phc->offset = round_page(phc->offset);
996 
997 	phdr->p_type = PT_LOAD;
998 	phdr->p_offset = phc->offset;
999 	phdr->p_vaddr = entry->start;
1000 	phdr->p_paddr = 0;
1001 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1002 	phdr->p_align = PAGE_SIZE;
1003 	phdr->p_flags = 0;
1004 	if (entry->protection & VM_PROT_READ)
1005 		phdr->p_flags |= PF_R;
1006 	if (entry->protection & VM_PROT_WRITE)
1007 		phdr->p_flags |= PF_W;
1008 	if (entry->protection & VM_PROT_EXECUTE)
1009 		phdr->p_flags |= PF_X;
1010 
1011 	phc->offset += phdr->p_filesz;
1012 	phc->phdr++;
1013 }
1014 
1015 /*
1016  * A callback for each_writable_segment() to gather information about
1017  * the number of segments and their total size.
1018  */
1019 static void
1020 cb_size_segment(entry, closure)
1021 	vm_map_entry_t entry;
1022 	void *closure;
1023 {
1024 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1025 
1026 	ssc->count++;
1027 	ssc->size += entry->end - entry->start;
1028 }
1029 
1030 /*
1031  * For each writable segment in the process's memory map, call the given
1032  * function with a pointer to the map entry and some arbitrary
1033  * caller-supplied data.
1034  */
1035 static void
1036 each_writable_segment(p, func, closure)
1037 	struct proc *p;
1038 	segment_callback func;
1039 	void *closure;
1040 {
1041 	vm_map_t map = &p->p_vmspace->vm_map;
1042 	vm_map_entry_t entry;
1043 
1044 	for (entry = map->header.next;  entry != &map->header;
1045 	    entry = entry->next) {
1046 		vm_object_t obj;
1047 
1048 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
1049 		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
1050 		    (VM_PROT_READ|VM_PROT_WRITE))
1051 			continue;
1052 
1053 		/*
1054 		** Dont include memory segment in the coredump if
1055 		** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1056 		** madvise(2).
1057 		*/
1058 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
1059 			continue;
1060 
1061 		if ((obj = entry->object.vm_object) == NULL)
1062 			continue;
1063 
1064 		/* Find the deepest backing object. */
1065 		while (obj->backing_object != NULL)
1066 			obj = obj->backing_object;
1067 
1068 		/* Ignore memory-mapped devices and such things. */
1069 		if (obj->type != OBJT_DEFAULT &&
1070 		    obj->type != OBJT_SWAP &&
1071 		    obj->type != OBJT_VNODE)
1072 			continue;
1073 
1074 		(*func)(entry, closure);
1075 	}
1076 }
1077 
1078 /*
1079  * Write the core file header to the file, including padding up to
1080  * the page boundary.
1081  */
1082 static int
1083 __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1084 	struct thread *td;
1085 	struct vnode *vp;
1086 	struct ucred *cred;
1087 	int numsegs;
1088 	size_t hdrsize;
1089 	void *hdr;
1090 {
1091 	struct {
1092 		prstatus_t status;
1093 		prfpregset_t fpregset;
1094 		prpsinfo_t psinfo;
1095 	} *tempdata;
1096 	struct proc *p = td->td_proc;
1097 	size_t off;
1098 	prstatus_t *status;
1099 	prfpregset_t *fpregset;
1100 	prpsinfo_t *psinfo;
1101 
1102 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO | M_WAITOK);
1103 	status = &tempdata->status;
1104 	fpregset = &tempdata->fpregset;
1105 	psinfo = &tempdata->psinfo;
1106 
1107 	/* Gather the information for the header. */
1108 	status->pr_version = PRSTATUS_VERSION;
1109 	status->pr_statussz = sizeof(prstatus_t);
1110 	status->pr_gregsetsz = sizeof(gregset_t);
1111 	status->pr_fpregsetsz = sizeof(fpregset_t);
1112 	status->pr_osreldate = osreldate;
1113 	status->pr_cursig = p->p_sig;
1114 	status->pr_pid = p->p_pid;
1115 	fill_regs(td, &status->pr_reg);
1116 
1117 	fill_fpregs(td, fpregset);
1118 
1119 	psinfo->pr_version = PRPSINFO_VERSION;
1120 	psinfo->pr_psinfosz = sizeof(prpsinfo_t);
1121 	strncpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname) - 1);
1122 
1123 	/* XXX - We don't fill in the command line arguments properly yet. */
1124 	strncpy(psinfo->pr_psargs, p->p_comm, PRARGSZ);
1125 
1126 	/* Fill in the header. */
1127 	bzero(hdr, hdrsize);
1128 	off = 0;
1129 	__elfN(puthdr)(p, hdr, &off, status, fpregset, psinfo, numsegs);
1130 
1131 	free(tempdata, M_TEMP);
1132 
1133 	/* Write it to the core file. */
1134 	return vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1135 	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1136 	    td); /* XXXKSE */
1137 }
1138 
1139 static void
1140 __elfN(puthdr)(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
1141     const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
1142 {
1143 	size_t ehoff;
1144 	size_t phoff;
1145 	size_t noteoff;
1146 	size_t notesz;
1147 
1148 	ehoff = *off;
1149 	*off += sizeof(Elf_Ehdr);
1150 
1151 	phoff = *off;
1152 	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1153 
1154 	noteoff = *off;
1155 	__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1156 	    sizeof *status);
1157 	__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1158 	    sizeof *fpregset);
1159 	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1160 	    sizeof *psinfo);
1161 	notesz = *off - noteoff;
1162 
1163 	/* Align up to a page boundary for the program segments. */
1164 	*off = round_page(*off);
1165 
1166 	if (dst != NULL) {
1167 		Elf_Ehdr *ehdr;
1168 		Elf_Phdr *phdr;
1169 		struct phdr_closure phc;
1170 
1171 		/*
1172 		 * Fill in the ELF header.
1173 		 */
1174 		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1175 		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1176 		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1177 		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1178 		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1179 		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1180 		ehdr->e_ident[EI_DATA] = ELF_DATA;
1181 		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1182 		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1183 		ehdr->e_ident[EI_ABIVERSION] = 0;
1184 		ehdr->e_ident[EI_PAD] = 0;
1185 		ehdr->e_type = ET_CORE;
1186 		ehdr->e_machine = ELF_ARCH;
1187 		ehdr->e_version = EV_CURRENT;
1188 		ehdr->e_entry = 0;
1189 		ehdr->e_phoff = phoff;
1190 		ehdr->e_flags = 0;
1191 		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1192 		ehdr->e_phentsize = sizeof(Elf_Phdr);
1193 		ehdr->e_phnum = numsegs + 1;
1194 		ehdr->e_shentsize = sizeof(Elf_Shdr);
1195 		ehdr->e_shnum = 0;
1196 		ehdr->e_shstrndx = SHN_UNDEF;
1197 
1198 		/*
1199 		 * Fill in the program header entries.
1200 		 */
1201 		phdr = (Elf_Phdr *)((char *)dst + phoff);
1202 
1203 		/* The note segement. */
1204 		phdr->p_type = PT_NOTE;
1205 		phdr->p_offset = noteoff;
1206 		phdr->p_vaddr = 0;
1207 		phdr->p_paddr = 0;
1208 		phdr->p_filesz = notesz;
1209 		phdr->p_memsz = 0;
1210 		phdr->p_flags = 0;
1211 		phdr->p_align = 0;
1212 		phdr++;
1213 
1214 		/* All the writable segments from the program. */
1215 		phc.phdr = phdr;
1216 		phc.offset = *off;
1217 		each_writable_segment(p, cb_put_phdr, &phc);
1218 	}
1219 }
1220 
1221 static void
1222 __elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1223     const void *desc, size_t descsz)
1224 {
1225 	Elf_Note note;
1226 
1227 	note.n_namesz = strlen(name) + 1;
1228 	note.n_descsz = descsz;
1229 	note.n_type = type;
1230 	if (dst != NULL)
1231 		bcopy(&note, (char *)dst + *off, sizeof note);
1232 	*off += sizeof note;
1233 	if (dst != NULL)
1234 		bcopy(name, (char *)dst + *off, note.n_namesz);
1235 	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1236 	if (dst != NULL)
1237 		bcopy(desc, (char *)dst + *off, note.n_descsz);
1238 	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1239 }
1240 
1241 /*
1242  * Tell kern_execve.c about it, with a little help from the linker.
1243  */
1244 #if __ELF_WORD_SIZE == 32
1245 static struct execsw elf_execsw = {exec_elf32_imgact, "ELF32"};
1246 EXEC_SET(elf32, elf_execsw);
1247 #else
1248 static struct execsw elf_execsw = {exec_elf64_imgact, "ELF64"};
1249 EXEC_SET(elf64, elf_execsw);
1250 #endif
1251