xref: /freebsd/sys/kern/imgact_elf.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /*-
2  * Copyright (c) 2000 David O'Brien
3  * Copyright (c) 1995-1996 S�ren Schmidt
4  * Copyright (c) 1996 Peter Wemm
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #include <sys/param.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/mman.h>
43 #include <sys/namei.h>
44 #include <sys/pioctl.h>
45 #include <sys/proc.h>
46 #include <sys/procfs.h>
47 #include <sys/resourcevar.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/stat.h>
51 #include <sys/sx.h>
52 #include <sys/syscall.h>
53 #include <sys/sysctl.h>
54 #include <sys/sysent.h>
55 #include <sys/vnode.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_kern.h>
59 #include <vm/vm_param.h>
60 #include <vm/pmap.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_object.h>
63 #include <vm/vm_extern.h>
64 
65 #include <machine/elf.h>
66 #include <machine/md_var.h>
67 
68 #define OLD_EI_BRAND	8
69 
70 __ElfType(Brandinfo);
71 __ElfType(Auxargs);
72 
73 static int __elfN(check_header)(const Elf_Ehdr *hdr);
74 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
75     u_long *entry, size_t pagesize);
76 static int __elfN(load_section)(struct proc *p,
77     struct vmspace *vmspace, struct vnode *vp, vm_object_t object,
78     vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
79     vm_prot_t prot, size_t pagesize);
80 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
81 
82 static int elf_trace = 0;
83 #if __ELF_WORD_SIZE == 32
84 SYSCTL_INT(_debug, OID_AUTO, elf32_trace, CTLFLAG_RW, &elf_trace, 0, "");
85 #else
86 SYSCTL_INT(_debug, OID_AUTO, elf64_trace, CTLFLAG_RW, &elf_trace, 0, "");
87 #endif
88 
89 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
90 
91 int
92 __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
93 {
94 	int i;
95 
96 	for (i=0; i<MAX_BRANDS; i++) {
97 		if (elf_brand_list[i] == NULL) {
98 			elf_brand_list[i] = entry;
99 			break;
100 		}
101 	}
102 	if (i == MAX_BRANDS)
103 		return -1;
104 	return 0;
105 }
106 
107 int
108 __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
109 {
110 	int i;
111 
112 	for (i=0; i<MAX_BRANDS; i++) {
113 		if (elf_brand_list[i] == entry) {
114 			elf_brand_list[i] = NULL;
115 			break;
116 		}
117 	}
118 	if (i == MAX_BRANDS)
119 		return -1;
120 	return 0;
121 }
122 
123 int
124 __elfN(brand_inuse)(Elf_Brandinfo *entry)
125 {
126 	struct proc *p;
127 	int rval = FALSE;
128 
129 	sx_slock(&allproc_lock);
130 	LIST_FOREACH(p, &allproc, p_list) {
131 		if (p->p_sysent == entry->sysvec) {
132 			rval = TRUE;
133 			break;
134 		}
135 	}
136 	sx_sunlock(&allproc_lock);
137 
138 	return (rval);
139 }
140 
141 static int
142 __elfN(check_header)(const Elf_Ehdr *hdr)
143 {
144 	int i;
145 
146 	if (!IS_ELF(*hdr) ||
147 	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
148 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
149 	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
150 		return ENOEXEC;
151 
152 	/*
153 	 * Make sure we have at least one brand for this machine.
154 	 */
155 
156 	for (i=0; i<MAX_BRANDS; i++) {
157 		if (elf_brand_list[i]->machine == hdr->e_machine)
158 			break;
159 	}
160 	if (i == MAX_BRANDS)
161 		return ENOEXEC;
162 
163 	if (hdr->e_version != ELF_TARG_VER)
164 		return ENOEXEC;
165 
166 	return 0;
167 }
168 
169 static int
170 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
171 	vm_offset_t start, vm_offset_t end, vm_prot_t prot,
172 	vm_prot_t max)
173 {
174 	int error, rv;
175 	vm_offset_t off;
176 	vm_offset_t data_buf = 0;
177 
178 	/*
179 	 * Create the page if it doesn't exist yet. Ignore errors.
180 	 */
181 	vm_map_lock(map);
182 	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
183 		      max, max, 0);
184 	vm_map_unlock(map);
185 
186 	/*
187 	 * Find the page from the underlying object.
188 	 */
189 	if (object) {
190 		vm_object_reference(object);
191 		rv = vm_map_find(exec_map,
192 				 object,
193 				 trunc_page(offset),
194 				 &data_buf,
195 				 PAGE_SIZE,
196 				 TRUE,
197 				 VM_PROT_READ,
198 				 VM_PROT_ALL,
199 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
200 		if (rv != KERN_SUCCESS) {
201 			vm_object_deallocate(object);
202 			return rv;
203 		}
204 
205 		off = offset - trunc_page(offset);
206 		error = copyout((caddr_t)data_buf+off, (caddr_t)start, end - start);
207 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
208 		if (error) {
209 			return KERN_FAILURE;
210 		}
211 	}
212 
213 	return KERN_SUCCESS;
214 }
215 
216 static int
217 __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
218 	vm_offset_t start, vm_offset_t end, vm_prot_t prot,
219 	vm_prot_t max, int cow)
220 {
221 	int rv;
222 
223 	if (start != trunc_page(start)) {
224 		rv = __elfN(map_partial)(map, object, offset,
225 				       start, round_page(start), prot, max);
226 		if (rv)
227 			return rv;
228 		offset += round_page(start) - start;
229 		start = round_page(start);
230 	}
231 	if (end != round_page(end)) {
232 		rv = __elfN(map_partial)(map, object,
233 				       offset + trunc_page(end) - start,
234 				       trunc_page(end), end, prot, max);
235 		if (rv)
236 			return rv;
237 		end = trunc_page(end);
238 	}
239 	if (end > start) {
240 		if (offset & PAGE_MASK) {
241 			vm_offset_t data_buf, off;
242 			vm_size_t sz;
243 			int error;
244 
245 			/*
246 			 * The mapping is not page aligned. This means we have
247 			 * to copy the data. Sigh.
248 			 */
249 			rv = vm_map_find(map, 0, 0,
250 					 &start, end - start,
251 					 FALSE, prot, max, 0);
252 			if (rv)
253 				return rv;
254 			while (start < end) {
255 				vm_object_reference(object);
256 				rv = vm_map_find(exec_map,
257 						 object,
258 						 trunc_page(offset),
259 						 &data_buf,
260 						 2*PAGE_SIZE,
261 						 TRUE,
262 						 VM_PROT_READ,
263 						 VM_PROT_ALL,
264 						 (MAP_COPY_ON_WRITE
265 						  | MAP_PREFAULT_PARTIAL));
266 				if (rv != KERN_SUCCESS) {
267 					vm_object_deallocate(object);
268 					return rv;
269 				}
270 				off = offset - trunc_page(offset);
271 				sz = end - start;
272 				if (sz > PAGE_SIZE)
273 					sz = PAGE_SIZE;
274 				error = copyout((caddr_t)data_buf+off,
275 						(caddr_t)start, sz);
276 				vm_map_remove(exec_map, data_buf,
277 					      data_buf + 2*PAGE_SIZE);
278 				if (error) {
279 					return KERN_FAILURE;
280 				}
281 				start += sz;
282 			}
283 			rv = KERN_SUCCESS;
284 		} else {
285 			vm_map_lock(map);
286 			rv =  vm_map_insert(map, object, offset, start, end,
287 					    prot, max, cow);
288 			vm_map_unlock(map);
289 		}
290 		return rv;
291 	} else {
292 		return KERN_SUCCESS;
293 	}
294 }
295 
296 static int
297 __elfN(load_section)(struct proc *p, struct vmspace *vmspace,
298 	struct vnode *vp, vm_object_t object, vm_offset_t offset,
299 	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
300 	size_t pagesize)
301 {
302 	size_t map_len;
303 	vm_offset_t map_addr;
304 	int error, rv;
305 	size_t copy_len;
306 	vm_offset_t file_addr;
307 	vm_offset_t data_buf = 0;
308 
309 	GIANT_REQUIRED;
310 
311 	error = 0;
312 
313 	/*
314 	 * It's necessary to fail if the filsz + offset taken from the
315 	 * header is greater than the actual file pager object's size.
316 	 * If we were to allow this, then the vm_map_find() below would
317 	 * walk right off the end of the file object and into the ether.
318 	 *
319 	 * While I'm here, might as well check for something else that
320 	 * is invalid: filsz cannot be greater than memsz.
321 	 */
322 	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
323 	    filsz > memsz) {
324 		uprintf("elf_load_section: truncated ELF file\n");
325 		return (ENOEXEC);
326 	}
327 
328 #define trunc_page_ps(va, ps)	((va) & ~(ps - 1))
329 #define round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
330 
331 	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
332 	file_addr = trunc_page_ps(offset, pagesize);
333 
334 	/*
335 	 * We have two choices.  We can either clear the data in the last page
336 	 * of an oversized mapping, or we can start the anon mapping a page
337 	 * early and copy the initialized data into that first page.  We
338 	 * choose the second..
339 	 */
340 	if (memsz > filsz)
341 		map_len = trunc_page_ps(offset+filsz, pagesize) - file_addr;
342 	else
343 		map_len = round_page_ps(offset+filsz, pagesize) - file_addr;
344 
345 	if (map_len != 0) {
346 		vm_object_reference(object);
347 		rv = __elfN(map_insert)(&vmspace->vm_map,
348 				      object,
349 				      file_addr,	/* file offset */
350 				      map_addr,		/* virtual start */
351 				      map_addr + map_len,/* virtual end */
352 				      prot,
353 				      VM_PROT_ALL,
354 				      MAP_COPY_ON_WRITE | MAP_PREFAULT);
355 		if (rv != KERN_SUCCESS) {
356 			vm_object_deallocate(object);
357 			return EINVAL;
358 		}
359 
360 		/* we can stop now if we've covered it all */
361 		if (memsz == filsz) {
362 			return 0;
363 		}
364 	}
365 
366 
367 	/*
368 	 * We have to get the remaining bit of the file into the first part
369 	 * of the oversized map segment.  This is normally because the .data
370 	 * segment in the file is extended to provide bss.  It's a neat idea
371 	 * to try and save a page, but it's a pain in the behind to implement.
372 	 */
373 	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
374 	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
375 	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - map_addr;
376 
377 	/* This had damn well better be true! */
378 	if (map_len != 0) {
379 		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0,
380 			map_addr, map_addr + map_len,
381 			VM_PROT_ALL, VM_PROT_ALL, 0);
382 		if (rv != KERN_SUCCESS) {
383 			return EINVAL;
384 		}
385 	}
386 
387 	if (copy_len != 0) {
388 		vm_offset_t off;
389 		vm_object_reference(object);
390 		rv = vm_map_find(exec_map,
391 				 object,
392 				 trunc_page(offset + filsz),
393 				 &data_buf,
394 				 PAGE_SIZE,
395 				 TRUE,
396 				 VM_PROT_READ,
397 				 VM_PROT_ALL,
398 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
399 		if (rv != KERN_SUCCESS) {
400 			vm_object_deallocate(object);
401 			return EINVAL;
402 		}
403 
404 		/* send the page fragment to user space */
405 		off = trunc_page_ps(offset + filsz, pagesize)
406 			- trunc_page(offset + filsz);
407 		error = copyout((caddr_t)data_buf+off, (caddr_t)map_addr,
408 			copy_len);
409 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
410 		if (error) {
411 			return (error);
412 		}
413 	}
414 
415 	/*
416 	 * set it to the specified protection.
417 	 * XXX had better undo the damage from pasting over the cracks here!
418 	 */
419 	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
420 	    round_page(map_addr + map_len),  prot, FALSE);
421 
422 	return error;
423 }
424 
425 /*
426  * Load the file "file" into memory.  It may be either a shared object
427  * or an executable.
428  *
429  * The "addr" reference parameter is in/out.  On entry, it specifies
430  * the address where a shared object should be loaded.  If the file is
431  * an executable, this value is ignored.  On exit, "addr" specifies
432  * where the file was actually loaded.
433  *
434  * The "entry" reference parameter is out only.  On exit, it specifies
435  * the entry point for the loaded file.
436  */
437 static int
438 __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
439 	u_long *entry, size_t pagesize)
440 {
441 	struct {
442 		struct nameidata nd;
443 		struct vattr attr;
444 		struct image_params image_params;
445 	} *tempdata;
446 	const Elf_Ehdr *hdr = NULL;
447 	const Elf_Phdr *phdr = NULL;
448 	struct nameidata *nd;
449 	struct vmspace *vmspace = p->p_vmspace;
450 	struct vattr *attr;
451 	struct image_params *imgp;
452 	vm_prot_t prot;
453 	u_long rbase;
454 	u_long base_addr = 0;
455 	int error, i, numsegs;
456 
457 	if (curthread->td_proc != p)
458 		panic("elf_load_file - thread");	/* XXXKSE DIAGNOSTIC */
459 
460 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
461 	nd = &tempdata->nd;
462 	attr = &tempdata->attr;
463 	imgp = &tempdata->image_params;
464 
465 	/*
466 	 * Initialize part of the common data
467 	 */
468 	imgp->proc = p;
469 	imgp->uap = NULL;
470 	imgp->attr = attr;
471 	imgp->firstpage = NULL;
472 	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
473 	imgp->object = NULL;
474 
475 	if (imgp->image_header == NULL) {
476 		nd->ni_vp = NULL;
477 		error = ENOMEM;
478 		goto fail;
479 	}
480 
481 	/* XXXKSE */
482 	NDINIT(nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, curthread);
483 
484 	if ((error = namei(nd)) != 0) {
485 		nd->ni_vp = NULL;
486 		goto fail;
487 	}
488 	NDFREE(nd, NDF_ONLY_PNBUF);
489 	imgp->vp = nd->ni_vp;
490 
491 	/*
492 	 * Check permissions, modes, uid, etc on the file, and "open" it.
493 	 */
494 	error = exec_check_permissions(imgp);
495 	if (error) {
496 		VOP_UNLOCK(nd->ni_vp, 0, curthread); /* XXXKSE */
497 		goto fail;
498 	}
499 
500 	error = exec_map_first_page(imgp);
501 	/*
502 	 * Also make certain that the interpreter stays the same, so set
503 	 * its VTEXT flag, too.
504 	 */
505 	if (error == 0)
506 		nd->ni_vp->v_flag |= VTEXT;
507 	VOP_GETVOBJECT(nd->ni_vp, &imgp->object);
508 	vm_object_reference(imgp->object);
509 
510 	VOP_UNLOCK(nd->ni_vp, 0, curthread); /* XXXKSE */
511 	if (error)
512 		goto fail;
513 
514 	hdr = (const Elf_Ehdr *)imgp->image_header;
515 	if ((error = __elfN(check_header)(hdr)) != 0)
516 		goto fail;
517 	if (hdr->e_type == ET_DYN)
518 		rbase = *addr;
519 	else if (hdr->e_type == ET_EXEC)
520 		rbase = 0;
521 	else {
522 		error = ENOEXEC;
523 		goto fail;
524 	}
525 
526 	/* Only support headers that fit within first page for now */
527 	if ((hdr->e_phoff > PAGE_SIZE) ||
528 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
529 		error = ENOEXEC;
530 		goto fail;
531 	}
532 
533 	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
534 
535 	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
536 		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
537 			prot = 0;
538 			if (phdr[i].p_flags & PF_X)
539   				prot |= VM_PROT_EXECUTE;
540 			if (phdr[i].p_flags & PF_W)
541   				prot |= VM_PROT_WRITE;
542 			if (phdr[i].p_flags & PF_R)
543   				prot |= VM_PROT_READ;
544 
545 			if ((error = __elfN(load_section)
546 			     (p, vmspace, nd->ni_vp,
547 			      imgp->object,
548 			      phdr[i].p_offset,
549 			      (caddr_t)(uintptr_t)phdr[i].p_vaddr +
550 			      rbase,
551 			      phdr[i].p_memsz,
552 			      phdr[i].p_filesz, prot, pagesize)) != 0)
553 				goto fail;
554 			/*
555 			 * Establish the base address if this is the
556 			 * first segment.
557 			 */
558 			if (numsegs == 0)
559   				base_addr = trunc_page(phdr[i].p_vaddr + rbase);
560 			numsegs++;
561 		}
562 	}
563 	*addr = base_addr;
564 	*entry=(unsigned long)hdr->e_entry + rbase;
565 
566 fail:
567 	if (imgp->firstpage)
568 		exec_unmap_first_page(imgp);
569 	if (imgp->image_header)
570 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
571 			PAGE_SIZE);
572 	if (imgp->object)
573 		vm_object_deallocate(imgp->object);
574 
575 	if (nd->ni_vp)
576 		vrele(nd->ni_vp);
577 
578 	free(tempdata, M_TEMP);
579 
580 	return error;
581 }
582 
583 extern int fallback_elf_brand;
584 
585 static int
586 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
587 {
588 	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
589 	const Elf_Phdr *phdr;
590 	Elf_Auxargs *elf_auxargs = NULL;
591 	struct vmspace *vmspace;
592 	vm_prot_t prot;
593 	u_long text_size = 0, data_size = 0;
594 	u_long text_addr = 0, data_addr = 0;
595 	u_long addr, entry = 0, proghdr = 0;
596 	vm_offset_t maxuser, usrstack, pagesize;
597 	int error, i;
598 	const char *interp = NULL;
599 	Elf_Brandinfo *brand_info;
600 	char *path;
601 
602 	GIANT_REQUIRED;
603 
604 	/*
605 	 * Do we have a valid ELF header ?
606 	 */
607 	if (__elfN(check_header)(hdr) != 0 || hdr->e_type != ET_EXEC)
608 		return -1;
609 
610 	/*
611 	 * From here on down, we return an errno, not -1, as we've
612 	 * detected an ELF file.
613 	 */
614 
615 	if ((hdr->e_phoff > PAGE_SIZE) ||
616 	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
617 		/* Only support headers in first page for now */
618 		return ENOEXEC;
619 	}
620 	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
621 
622 	/*
623 	 * From this point on, we may have resources that need to be freed.
624 	 */
625 
626 	/*
627 	 * Yeah, I'm paranoid.  There is every reason in the world to get
628 	 * VTEXT now since from here on out, there are places we can have
629 	 * a context switch.  Better safe than sorry; I really don't want
630 	 * the file to change while it's being loaded.
631 	 */
632 	mtx_lock(&imgp->vp->v_interlock);
633 	imgp->vp->v_flag |= VTEXT;
634 	mtx_unlock(&imgp->vp->v_interlock);
635 
636 	if ((error = exec_extract_strings(imgp)) != 0)
637 		goto fail;
638 
639 	/*
640 	 * Tentatively identify the brand based on the machine so that
641 	 * we can figure out VM ranges and page sizes.
642 	 */
643 	brand_info = NULL;
644 	for (i = 0;  i < MAX_BRANDS;  i++) {
645 		Elf_Brandinfo *bi = elf_brand_list[i];
646 
647 		if (bi != NULL &&
648 		    hdr->e_machine == bi->machine &&
649 		    (hdr->e_ident[EI_OSABI] == bi->brand
650 		     || 0 ==
651 		     strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
652 		     bi->compat_3_brand, strlen(bi->compat_3_brand)))) {
653 			brand_info = bi;
654 			break;
655 		}
656 	}
657 
658 	pagesize = PAGE_SIZE;
659 	maxuser = VM_MAXUSER_ADDRESS;
660 	usrstack = USRSTACK;
661 	if (brand_info) {
662 		if (brand_info->sysvec->sv_pagesize)
663 			pagesize = brand_info->sysvec->sv_pagesize;
664 		if (brand_info->sysvec->sv_maxuser)
665 			maxuser = brand_info->sysvec->sv_maxuser;
666 		if (brand_info->sysvec->sv_usrstack)
667 			usrstack = brand_info->sysvec->sv_usrstack;
668 	}
669 
670 	exec_new_vmspace(imgp, VM_MIN_ADDRESS, maxuser, usrstack);
671 
672 	vmspace = imgp->proc->p_vmspace;
673 
674 	for (i = 0; i < hdr->e_phnum; i++) {
675 		switch(phdr[i].p_type) {
676 
677 		case PT_LOAD:	/* Loadable segment */
678 			prot = 0;
679 			if (phdr[i].p_flags & PF_X)
680   				prot |= VM_PROT_EXECUTE;
681 			if (phdr[i].p_flags & PF_W)
682   				prot |= VM_PROT_WRITE;
683 			if (phdr[i].p_flags & PF_R)
684   				prot |= VM_PROT_READ;
685 
686 #if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
687 			/*
688 			 * Some x86 binaries assume read == executable,
689 			 * notably the M3 runtime and therefore cvsup
690 			 */
691 			if (prot & VM_PROT_READ)
692 				prot |= VM_PROT_EXECUTE;
693 #endif
694 
695 			if ((error = __elfN(load_section)
696 			     (imgp->proc,
697 			      vmspace, imgp->vp,
698 			      imgp->object,
699 			      phdr[i].p_offset,
700 			      (caddr_t)(uintptr_t)phdr[i].p_vaddr,
701 			      phdr[i].p_memsz,
702 			      phdr[i].p_filesz, prot, pagesize)) != 0)
703   				goto fail;
704 
705 			/*
706 			 * Is this .text or .data ??
707 			 *
708 			 * We only handle one each of those yet XXX
709 			 */
710 			if (hdr->e_entry >= phdr[i].p_vaddr &&
711 			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
712   				text_addr = trunc_page(phdr[i].p_vaddr);
713   				text_size = round_page(phdr[i].p_memsz +
714 						       phdr[i].p_vaddr -
715 						       text_addr);
716 				entry = (u_long)hdr->e_entry;
717 			} else {
718   				data_addr = trunc_page(phdr[i].p_vaddr);
719   				data_size = round_page(phdr[i].p_memsz +
720 						       phdr[i].p_vaddr -
721 						       data_addr);
722 			}
723 			break;
724 	  	case PT_INTERP:	/* Path to interpreter */
725 			if (phdr[i].p_filesz > MAXPATHLEN ||
726 			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
727 				error = ENOEXEC;
728 				goto fail;
729 			}
730 			interp = imgp->image_header + phdr[i].p_offset;
731 			break;
732 		case PT_PHDR: 	/* Program header table info */
733 			proghdr = phdr[i].p_vaddr;
734 			break;
735 		default:
736 			break;
737 		}
738 	}
739 
740 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
741 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
742 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
743 	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
744 
745 	addr = ELF_RTLD_ADDR(vmspace);
746 
747 	imgp->entry_addr = entry;
748 
749 	brand_info = NULL;
750 
751 	/* We support three types of branding -- (1) the ELF EI_OSABI field
752 	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
753 	 * branding w/in the ELF header, and (3) path of the `interp_path'
754 	 * field.  We should also look for an ".note.ABI-tag" ELF section now
755 	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
756 	 */
757 
758 	/* If the executable has a brand, search for it in the brand list. */
759 	if (brand_info == NULL) {
760 		for (i = 0;  i < MAX_BRANDS;  i++) {
761 			Elf_Brandinfo *bi = elf_brand_list[i];
762 
763 			if (bi != NULL &&
764 			    hdr->e_machine == bi->machine &&
765 			    (hdr->e_ident[EI_OSABI] == bi->brand
766 			    || 0 ==
767 			    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
768 			    bi->compat_3_brand, strlen(bi->compat_3_brand)))) {
769 				brand_info = bi;
770 				break;
771 			}
772 		}
773 	}
774 
775 	/* Lacking a known brand, search for a recognized interpreter. */
776 	if (brand_info == NULL && interp != NULL) {
777 		for (i = 0;  i < MAX_BRANDS;  i++) {
778 			Elf_Brandinfo *bi = elf_brand_list[i];
779 
780 			if (bi != NULL &&
781 			    hdr->e_machine == bi->machine &&
782 			    strcmp(interp, bi->interp_path) == 0) {
783 				brand_info = bi;
784 				break;
785 			}
786 		}
787 	}
788 
789 	/* Lacking a recognized interpreter, try the default brand */
790 	if (brand_info == NULL) {
791 		for (i = 0; i < MAX_BRANDS; i++) {
792 			Elf_Brandinfo *bi = elf_brand_list[i];
793 
794 			if (bi != NULL &&
795 			    hdr->e_machine == bi->machine &&
796 			    fallback_elf_brand == bi->brand) {
797 				brand_info = bi;
798 				break;
799 			}
800 		}
801 	}
802 
803 	if (brand_info == NULL) {
804 		uprintf("ELF binary type \"%u\" not known.\n",
805 		    hdr->e_ident[EI_OSABI]);
806 		error = ENOEXEC;
807 		goto fail;
808 	}
809 
810 	imgp->proc->p_sysent = brand_info->sysvec;
811 	if (interp != NULL) {
812 		path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
813 		snprintf(path, MAXPATHLEN, "%s%s",
814 			 brand_info->emul_path, interp);
815 		if ((error = __elfN(load_file)(imgp->proc, path, &addr,
816 					       &imgp->entry_addr,
817 					       pagesize)) != 0) {
818 			if ((error = __elfN(load_file)
819 			     (imgp->proc, interp, &addr,
820 			      &imgp->entry_addr, pagesize)) != 0) {
821 				uprintf("ELF interpreter %s not found\n", path);
822 				free(path, M_TEMP);
823 				goto fail;
824 			}
825 		}
826 		free(path, M_TEMP);
827 	}
828 
829 	/*
830 	 * Construct auxargs table (used by the fixup routine)
831 	 */
832 	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
833 	elf_auxargs->execfd = -1;
834 	elf_auxargs->phdr = proghdr;
835 	elf_auxargs->phent = hdr->e_phentsize;
836 	elf_auxargs->phnum = hdr->e_phnum;
837 	elf_auxargs->pagesz = PAGE_SIZE;
838 	elf_auxargs->base = addr;
839 	elf_auxargs->flags = 0;
840 	elf_auxargs->entry = entry;
841 	elf_auxargs->trace = elf_trace;
842 
843 	imgp->auxargs = elf_auxargs;
844 	imgp->interpreted = 0;
845 
846 fail:
847 	return error;
848 }
849 
850 #if __ELF_WORD_SIZE == 32
851 #define suword	suword32
852 #define stacktype u_int32_t
853 #else
854 #define suword	suword64
855 #define stacktype u_int64_t
856 #endif
857 
858 int
859 __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
860 {
861 	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
862 	stacktype *base;
863 	stacktype *pos;
864 
865 	base = (stacktype *)*stack_base;
866 	pos = base + (imgp->argc + imgp->envc + 2);
867 
868 	if (args->trace) {
869 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
870 	}
871 	if (args->execfd != -1) {
872 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
873 	}
874 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
875 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
876 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
877 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
878 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
879 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
880 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
881 	AUXARGS_ENTRY(pos, AT_NULL, 0);
882 
883 	free(imgp->auxargs, M_TEMP);
884 	imgp->auxargs = NULL;
885 
886 	base--;
887 	suword(base, (long) imgp->argc);
888 	*stack_base = (register_t *)base;
889 	return 0;
890 }
891 
892 /*
893  * Code for generating ELF core dumps.
894  */
895 
896 typedef void (*segment_callback)(vm_map_entry_t, void *);
897 
898 /* Closure for cb_put_phdr(). */
899 struct phdr_closure {
900 	Elf_Phdr *phdr;		/* Program header to fill in */
901 	Elf_Off offset;		/* Offset of segment in core file */
902 };
903 
904 /* Closure for cb_size_segment(). */
905 struct sseg_closure {
906 	int count;		/* Count of writable segments. */
907 	size_t size;		/* Total size of all writable segments. */
908 };
909 
910 static void cb_put_phdr(vm_map_entry_t, void *);
911 static void cb_size_segment(vm_map_entry_t, void *);
912 static void each_writable_segment(struct proc *, segment_callback, void *);
913 static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
914     int, void *, size_t);
915 static void __elfN(puthdr)(struct proc *, void *, size_t *,
916     const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int);
917 static void __elfN(putnote)(void *, size_t *, const char *, int,
918     const void *, size_t);
919 
920 extern int osreldate;
921 
922 int
923 __elfN(coredump)(td, vp, limit)
924 	struct thread *td;
925 	register struct vnode *vp;
926 	off_t limit;
927 {
928 	register struct proc *p = td->td_proc;
929 	register struct ucred *cred = td->td_ucred;
930 	int error = 0;
931 	struct sseg_closure seginfo;
932 	void *hdr;
933 	size_t hdrsize;
934 
935 	/* Size the program segments. */
936 	seginfo.count = 0;
937 	seginfo.size = 0;
938 	each_writable_segment(p, cb_size_segment, &seginfo);
939 
940 	/*
941 	 * Calculate the size of the core file header area by making
942 	 * a dry run of generating it.  Nothing is written, but the
943 	 * size is calculated.
944 	 */
945 	hdrsize = 0;
946 	__elfN(puthdr)((struct proc *)NULL, (void *)NULL, &hdrsize,
947 	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
948 	    (const prpsinfo_t *)NULL, seginfo.count);
949 
950 	if (hdrsize + seginfo.size >= limit)
951 		return (EFAULT);
952 
953 	/*
954 	 * Allocate memory for building the header, fill it up,
955 	 * and write it out.
956 	 */
957 	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
958 	if (hdr == NULL) {
959 		return EINVAL;
960 	}
961 	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
962 
963 	/* Write the contents of all of the writable segments. */
964 	if (error == 0) {
965 		Elf_Phdr *php;
966 		off_t offset;
967 		int i;
968 
969 		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
970 		offset = hdrsize;
971 		for (i = 0;  i < seginfo.count;  i++) {
972 			error = vn_rdwr_inchunks(UIO_WRITE, vp,
973 			    (caddr_t)(uintptr_t)php->p_vaddr,
974 			    php->p_filesz, offset, UIO_USERSPACE,
975 			    IO_UNIT | IO_DIRECT, cred, (int *)NULL, curthread); /* XXXKSE */
976 			if (error != 0)
977 				break;
978 			offset += php->p_filesz;
979 			php++;
980 		}
981 	}
982 	free(hdr, M_TEMP);
983 
984 	return error;
985 }
986 
987 /*
988  * A callback for each_writable_segment() to write out the segment's
989  * program header entry.
990  */
991 static void
992 cb_put_phdr(entry, closure)
993 	vm_map_entry_t entry;
994 	void *closure;
995 {
996 	struct phdr_closure *phc = (struct phdr_closure *)closure;
997 	Elf_Phdr *phdr = phc->phdr;
998 
999 	phc->offset = round_page(phc->offset);
1000 
1001 	phdr->p_type = PT_LOAD;
1002 	phdr->p_offset = phc->offset;
1003 	phdr->p_vaddr = entry->start;
1004 	phdr->p_paddr = 0;
1005 	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1006 	phdr->p_align = PAGE_SIZE;
1007 	phdr->p_flags = 0;
1008 	if (entry->protection & VM_PROT_READ)
1009 		phdr->p_flags |= PF_R;
1010 	if (entry->protection & VM_PROT_WRITE)
1011 		phdr->p_flags |= PF_W;
1012 	if (entry->protection & VM_PROT_EXECUTE)
1013 		phdr->p_flags |= PF_X;
1014 
1015 	phc->offset += phdr->p_filesz;
1016 	phc->phdr++;
1017 }
1018 
1019 /*
1020  * A callback for each_writable_segment() to gather information about
1021  * the number of segments and their total size.
1022  */
1023 static void
1024 cb_size_segment(entry, closure)
1025 	vm_map_entry_t entry;
1026 	void *closure;
1027 {
1028 	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1029 
1030 	ssc->count++;
1031 	ssc->size += entry->end - entry->start;
1032 }
1033 
1034 /*
1035  * For each writable segment in the process's memory map, call the given
1036  * function with a pointer to the map entry and some arbitrary
1037  * caller-supplied data.
1038  */
1039 static void
1040 each_writable_segment(p, func, closure)
1041 	struct proc *p;
1042 	segment_callback func;
1043 	void *closure;
1044 {
1045 	vm_map_t map = &p->p_vmspace->vm_map;
1046 	vm_map_entry_t entry;
1047 
1048 	for (entry = map->header.next;  entry != &map->header;
1049 	    entry = entry->next) {
1050 		vm_object_t obj;
1051 
1052 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
1053 		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
1054 		    (VM_PROT_READ|VM_PROT_WRITE))
1055 			continue;
1056 
1057 		/*
1058 		** Dont include memory segment in the coredump if
1059 		** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1060 		** madvise(2).
1061 		*/
1062 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
1063 			continue;
1064 
1065 		if ((obj = entry->object.vm_object) == NULL)
1066 			continue;
1067 
1068 		/* Find the deepest backing object. */
1069 		while (obj->backing_object != NULL)
1070 			obj = obj->backing_object;
1071 
1072 		/* Ignore memory-mapped devices and such things. */
1073 		if (obj->type != OBJT_DEFAULT &&
1074 		    obj->type != OBJT_SWAP &&
1075 		    obj->type != OBJT_VNODE)
1076 			continue;
1077 
1078 		(*func)(entry, closure);
1079 	}
1080 }
1081 
1082 /*
1083  * Write the core file header to the file, including padding up to
1084  * the page boundary.
1085  */
1086 static int
1087 __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1088 	struct thread *td;
1089 	struct vnode *vp;
1090 	struct ucred *cred;
1091 	int numsegs;
1092 	size_t hdrsize;
1093 	void *hdr;
1094 {
1095 	struct {
1096 		prstatus_t status;
1097 		prfpregset_t fpregset;
1098 		prpsinfo_t psinfo;
1099 	} *tempdata;
1100 	struct proc *p = td->td_proc;
1101 	size_t off;
1102 	prstatus_t *status;
1103 	prfpregset_t *fpregset;
1104 	prpsinfo_t *psinfo;
1105 
1106 	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO | M_WAITOK);
1107 	status = &tempdata->status;
1108 	fpregset = &tempdata->fpregset;
1109 	psinfo = &tempdata->psinfo;
1110 
1111 	/* Gather the information for the header. */
1112 	status->pr_version = PRSTATUS_VERSION;
1113 	status->pr_statussz = sizeof(prstatus_t);
1114 	status->pr_gregsetsz = sizeof(gregset_t);
1115 	status->pr_fpregsetsz = sizeof(fpregset_t);
1116 	status->pr_osreldate = osreldate;
1117 	status->pr_cursig = p->p_sig;
1118 	status->pr_pid = p->p_pid;
1119 	fill_regs(td, &status->pr_reg);
1120 
1121 	fill_fpregs(td, fpregset);
1122 
1123 	psinfo->pr_version = PRPSINFO_VERSION;
1124 	psinfo->pr_psinfosz = sizeof(prpsinfo_t);
1125 	strncpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname) - 1);
1126 
1127 	/* XXX - We don't fill in the command line arguments properly yet. */
1128 	strncpy(psinfo->pr_psargs, p->p_comm, PRARGSZ);
1129 
1130 	/* Fill in the header. */
1131 	bzero(hdr, hdrsize);
1132 	off = 0;
1133 	__elfN(puthdr)(p, hdr, &off, status, fpregset, psinfo, numsegs);
1134 
1135 	free(tempdata, M_TEMP);
1136 
1137 	/* Write it to the core file. */
1138 	return vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1139 	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NULL, td); /* XXXKSE */
1140 }
1141 
1142 static void
1143 __elfN(puthdr)(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
1144     const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
1145 {
1146 	size_t ehoff;
1147 	size_t phoff;
1148 	size_t noteoff;
1149 	size_t notesz;
1150 
1151 	ehoff = *off;
1152 	*off += sizeof(Elf_Ehdr);
1153 
1154 	phoff = *off;
1155 	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1156 
1157 	noteoff = *off;
1158 	__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1159 	    sizeof *status);
1160 	__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1161 	    sizeof *fpregset);
1162 	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1163 	    sizeof *psinfo);
1164 	notesz = *off - noteoff;
1165 
1166 	/* Align up to a page boundary for the program segments. */
1167 	*off = round_page(*off);
1168 
1169 	if (dst != NULL) {
1170 		Elf_Ehdr *ehdr;
1171 		Elf_Phdr *phdr;
1172 		struct phdr_closure phc;
1173 
1174 		/*
1175 		 * Fill in the ELF header.
1176 		 */
1177 		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1178 		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1179 		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1180 		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1181 		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1182 		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1183 		ehdr->e_ident[EI_DATA] = ELF_DATA;
1184 		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1185 		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1186 		ehdr->e_ident[EI_ABIVERSION] = 0;
1187 		ehdr->e_ident[EI_PAD] = 0;
1188 		ehdr->e_type = ET_CORE;
1189 		ehdr->e_machine = ELF_ARCH;
1190 		ehdr->e_version = EV_CURRENT;
1191 		ehdr->e_entry = 0;
1192 		ehdr->e_phoff = phoff;
1193 		ehdr->e_flags = 0;
1194 		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1195 		ehdr->e_phentsize = sizeof(Elf_Phdr);
1196 		ehdr->e_phnum = numsegs + 1;
1197 		ehdr->e_shentsize = sizeof(Elf_Shdr);
1198 		ehdr->e_shnum = 0;
1199 		ehdr->e_shstrndx = SHN_UNDEF;
1200 
1201 		/*
1202 		 * Fill in the program header entries.
1203 		 */
1204 		phdr = (Elf_Phdr *)((char *)dst + phoff);
1205 
1206 		/* The note segement. */
1207 		phdr->p_type = PT_NOTE;
1208 		phdr->p_offset = noteoff;
1209 		phdr->p_vaddr = 0;
1210 		phdr->p_paddr = 0;
1211 		phdr->p_filesz = notesz;
1212 		phdr->p_memsz = 0;
1213 		phdr->p_flags = 0;
1214 		phdr->p_align = 0;
1215 		phdr++;
1216 
1217 		/* All the writable segments from the program. */
1218 		phc.phdr = phdr;
1219 		phc.offset = *off;
1220 		each_writable_segment(p, cb_put_phdr, &phc);
1221 	}
1222 }
1223 
1224 static void
1225 __elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1226     const void *desc, size_t descsz)
1227 {
1228 	Elf_Note note;
1229 
1230 	note.n_namesz = strlen(name) + 1;
1231 	note.n_descsz = descsz;
1232 	note.n_type = type;
1233 	if (dst != NULL)
1234 		bcopy(&note, (char *)dst + *off, sizeof note);
1235 	*off += sizeof note;
1236 	if (dst != NULL)
1237 		bcopy(name, (char *)dst + *off, note.n_namesz);
1238 	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1239 	if (dst != NULL)
1240 		bcopy(desc, (char *)dst + *off, note.n_descsz);
1241 	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1242 }
1243 
1244 /*
1245  * Tell kern_execve.c about it, with a little help from the linker.
1246  */
1247 #if __ELF_WORD_SIZE == 32
1248 static struct execsw elf_execsw = {exec_elf32_imgact, "ELF32"};
1249 EXEC_SET(elf32, elf_execsw);
1250 #else
1251 static struct execsw elf_execsw = {exec_elf64_imgact, "ELF64"};
1252 EXEC_SET(elf64, elf_execsw);
1253 #endif
1254