xref: /freebsd/sys/kern/link_elf_obj.c (revision 8ecd87a3e7f5503951d37eab034cb330a1c6ec86)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_ddb.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/linker.h>
42 #include <sys/mutex.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/vnode.h>
48 
49 #include <machine/elf.h>
50 
51 #include <net/vnet.h>
52 
53 #include <security/mac/mac_framework.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_param.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64 
65 #include <sys/link_elf.h>
66 
67 #ifdef DDB_CTF
68 #include <contrib/zlib/zlib.h>
69 #endif
70 
71 #include "linker_if.h"
72 
73 typedef struct {
74 	void		*addr;
75 	Elf_Off		size;
76 	int		flags;	/* Section flags. */
77 	int		sec;	/* Original section number. */
78 	char		*name;
79 } Elf_progent;
80 
81 typedef struct {
82 	Elf_Rel		*rel;
83 	int		nrel;
84 	int		sec;
85 } Elf_relent;
86 
87 typedef struct {
88 	Elf_Rela	*rela;
89 	int		nrela;
90 	int		sec;
91 } Elf_relaent;
92 
93 typedef struct elf_file {
94 	struct linker_file lf;		/* Common fields */
95 
96 	int		preloaded;
97 	caddr_t		address;	/* Relocation address */
98 	vm_object_t	object;		/* VM object to hold file pages */
99 	Elf_Shdr	*e_shdr;
100 
101 	Elf_progent	*progtab;
102 	u_int		nprogtab;
103 
104 	Elf_relaent	*relatab;
105 	u_int		nrelatab;
106 
107 	Elf_relent	*reltab;
108 	int		nreltab;
109 
110 	Elf_Sym		*ddbsymtab;	/* The symbol table we are using */
111 	long		ddbsymcnt;	/* Number of symbols */
112 	caddr_t		ddbstrtab;	/* String table */
113 	long		ddbstrcnt;	/* number of bytes in string table */
114 
115 	caddr_t		shstrtab;	/* Section name string table */
116 	long		shstrcnt;	/* number of bytes in string table */
117 
118 	caddr_t		ctftab;		/* CTF table */
119 	long		ctfcnt;		/* number of bytes in CTF table */
120 	caddr_t		ctfoff;		/* CTF offset table */
121 	caddr_t		typoff;		/* Type offset table */
122 	long		typlen;		/* Number of type entries. */
123 
124 } *elf_file_t;
125 
126 #include <kern/kern_ctf.c>
127 
128 static int	link_elf_link_preload(linker_class_t cls,
129 		    const char *, linker_file_t *);
130 static int	link_elf_link_preload_finish(linker_file_t);
131 static int	link_elf_load_file(linker_class_t, const char *, linker_file_t *);
132 static int	link_elf_lookup_symbol(linker_file_t, const char *,
133 		    c_linker_sym_t *);
134 static int	link_elf_symbol_values(linker_file_t, c_linker_sym_t,
135 		    linker_symval_t *);
136 static int	link_elf_search_symbol(linker_file_t, caddr_t value,
137 		    c_linker_sym_t *sym, long *diffp);
138 
139 static void	link_elf_unload_file(linker_file_t);
140 static int	link_elf_lookup_set(linker_file_t, const char *,
141 		    void ***, void ***, int *);
142 static int	link_elf_each_function_name(linker_file_t,
143 		    int (*)(const char *, void *), void *);
144 static int	link_elf_each_function_nameval(linker_file_t,
145 				linker_function_nameval_callback_t,
146 				void *);
147 static int	link_elf_reloc_local(linker_file_t, bool);
148 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
149 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
150 
151 static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
152 		    Elf_Addr *);
153 
154 static kobj_method_t link_elf_methods[] = {
155 	KOBJMETHOD(linker_lookup_symbol,	link_elf_lookup_symbol),
156 	KOBJMETHOD(linker_symbol_values,	link_elf_symbol_values),
157 	KOBJMETHOD(linker_search_symbol,	link_elf_search_symbol),
158 	KOBJMETHOD(linker_unload,		link_elf_unload_file),
159 	KOBJMETHOD(linker_load_file,		link_elf_load_file),
160 	KOBJMETHOD(linker_link_preload,		link_elf_link_preload),
161 	KOBJMETHOD(linker_link_preload_finish,	link_elf_link_preload_finish),
162 	KOBJMETHOD(linker_lookup_set,		link_elf_lookup_set),
163 	KOBJMETHOD(linker_each_function_name,	link_elf_each_function_name),
164 	KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
165 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
166 	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
167 	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
168 	KOBJMETHOD_END
169 };
170 
171 static struct linker_class link_elf_class = {
172 #if ELF_TARG_CLASS == ELFCLASS32
173 	"elf32_obj",
174 #else
175 	"elf64_obj",
176 #endif
177 	link_elf_methods, sizeof(struct elf_file)
178 };
179 
180 static int	relocate_file(elf_file_t ef);
181 static void	elf_obj_cleanup_globals_cache(elf_file_t);
182 
183 static void
184 link_elf_error(const char *filename, const char *s)
185 {
186 	if (filename == NULL)
187 		printf("kldload: %s\n", s);
188 	else
189 		printf("kldload: %s: %s\n", filename, s);
190 }
191 
192 static void
193 link_elf_init(void *arg)
194 {
195 
196 	linker_add_class(&link_elf_class);
197 }
198 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
199 
200 static void
201 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
202     vm_prot_t prot)
203 {
204 	int error __unused;
205 
206 	KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
207 	    end <= round_page((vm_offset_t)ef->address + ef->lf.size),
208 	    ("link_elf_protect_range: invalid range %#jx-%#jx",
209 	    (uintmax_t)start, (uintmax_t)end));
210 
211 	if (start == end)
212 		return;
213 	if (ef->preloaded) {
214 #ifdef __amd64__
215 		error = pmap_change_prot(start, end - start, prot);
216 		KASSERT(error == 0,
217 		    ("link_elf_protect_range: pmap_change_prot() returned %d",
218 		    error));
219 #endif
220 		return;
221 	}
222 	error = vm_map_protect(kernel_map, start, end, prot, FALSE);
223 	KASSERT(error == KERN_SUCCESS,
224 	    ("link_elf_protect_range: vm_map_protect() returned %d", error));
225 }
226 
227 /*
228  * Restrict permissions on linker file memory based on section flags.
229  * Sections need not be page-aligned, so overlap within a page is possible.
230  */
231 static void
232 link_elf_protect(elf_file_t ef)
233 {
234 	vm_offset_t end, segend, segstart, start;
235 	vm_prot_t gapprot, prot, segprot;
236 	int i;
237 
238 	/*
239 	 * If the file was preloaded, the last page may contain other preloaded
240 	 * data which may need to be writeable.  ELF files are always
241 	 * page-aligned, but other preloaded data, such as entropy or CPU
242 	 * microcode may be loaded with a smaller alignment.
243 	 */
244 	gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
245 
246 	start = end = (vm_offset_t)ef->address;
247 	prot = VM_PROT_READ;
248 	for (i = 0; i < ef->nprogtab; i++) {
249 		/*
250 		 * VNET and DPCPU sections have their memory allocated by their
251 		 * respective subsystems.
252 		 */
253 		if (ef->progtab[i].name != NULL && (
254 #ifdef VIMAGE
255 		    strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
256 #endif
257 		    strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
258 			continue;
259 
260 		segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
261 		segend = round_page((vm_offset_t)ef->progtab[i].addr +
262 		    ef->progtab[i].size);
263 		segprot = VM_PROT_READ;
264 		if ((ef->progtab[i].flags & SHF_WRITE) != 0)
265 			segprot |= VM_PROT_WRITE;
266 		if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
267 			segprot |= VM_PROT_EXECUTE;
268 
269 		if (end <= segstart) {
270 			/*
271 			 * Case 1: there is no overlap between the previous
272 			 * segment and this one.  Apply protections to the
273 			 * previous segment, and protect the gap between the
274 			 * previous and current segments, if any.
275 			 */
276 			link_elf_protect_range(ef, start, end, prot);
277 			link_elf_protect_range(ef, end, segstart, gapprot);
278 
279 			start = segstart;
280 			end = segend;
281 			prot = segprot;
282 		} else if (start < segstart && end == segend) {
283 			/*
284 			 * Case 2: the current segment is a subrange of the
285 			 * previous segment.  Apply protections to the
286 			 * non-overlapping portion of the previous segment.
287 			 */
288 			link_elf_protect_range(ef, start, segstart, prot);
289 
290 			start = segstart;
291 			prot |= segprot;
292 		} else if (end < segend) {
293 			/*
294 			 * Case 3: there is partial overlap between the previous
295 			 * and current segments.  Apply protections to the
296 			 * non-overlapping portion of the previous segment, and
297 			 * then the overlap, which must use the union of the two
298 			 * segments' protections.
299 			 */
300 			link_elf_protect_range(ef, start, segstart, prot);
301 			link_elf_protect_range(ef, segstart, end,
302 			    prot | segprot);
303 			start = end;
304 			end = segend;
305 			prot = segprot;
306 		} else {
307 			/*
308 			 * Case 4: the two segments reside in the same page.
309 			 */
310 			prot |= segprot;
311 		}
312 	}
313 
314 	/*
315 	 * Fix up the last unprotected segment and trailing data.
316 	 */
317 	link_elf_protect_range(ef, start, end, prot);
318 	link_elf_protect_range(ef, end,
319 	    round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
320 }
321 
322 static int
323 link_elf_link_preload(linker_class_t cls, const char *filename,
324     linker_file_t *result)
325 {
326 	Elf_Ehdr *hdr;
327 	Elf_Shdr *shdr;
328 	Elf_Sym *es;
329 	void *modptr, *baseptr, *sizeptr;
330 	char *type;
331 	elf_file_t ef;
332 	linker_file_t lf;
333 	Elf_Addr off;
334 	int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
335 
336 	/* Look to see if we have the file preloaded */
337 	modptr = preload_search_by_name(filename);
338 	if (modptr == NULL)
339 		return ENOENT;
340 
341 	type = (char *)preload_search_info(modptr, MODINFO_TYPE);
342 	baseptr = preload_search_info(modptr, MODINFO_ADDR);
343 	sizeptr = preload_search_info(modptr, MODINFO_SIZE);
344 	hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
345 	    MODINFOMD_ELFHDR);
346 	shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
347 	    MODINFOMD_SHDR);
348 	if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
349 	    " obj module") != 0 &&
350 	    strcmp(type, "elf obj module") != 0)) {
351 		return (EFTYPE);
352 	}
353 	if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
354 	    shdr == NULL)
355 		return (EINVAL);
356 
357 	lf = linker_make_file(filename, &link_elf_class);
358 	if (lf == NULL)
359 		return (ENOMEM);
360 
361 	ef = (elf_file_t)lf;
362 	ef->preloaded = 1;
363 	ef->address = *(caddr_t *)baseptr;
364 	lf->address = *(caddr_t *)baseptr;
365 	lf->size = *(size_t *)sizeptr;
366 
367 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
368 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
369 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
370 	    hdr->e_version != EV_CURRENT ||
371 	    hdr->e_type != ET_REL ||
372 	    hdr->e_machine != ELF_TARG_MACH) {
373 		error = EFTYPE;
374 		goto out;
375 	}
376 	ef->e_shdr = shdr;
377 
378 	/* Scan the section header for information and table sizing. */
379 	symtabindex = -1;
380 	symstrindex = -1;
381 	for (i = 0; i < hdr->e_shnum; i++) {
382 		switch (shdr[i].sh_type) {
383 		case SHT_PROGBITS:
384 		case SHT_NOBITS:
385 #ifdef __amd64__
386 		case SHT_X86_64_UNWIND:
387 #endif
388 			/* Ignore sections not loaded by the loader. */
389 			if (shdr[i].sh_addr == 0)
390 				break;
391 			ef->nprogtab++;
392 			break;
393 		case SHT_SYMTAB:
394 			symtabindex = i;
395 			symstrindex = shdr[i].sh_link;
396 			break;
397 		case SHT_REL:
398 			/*
399 			 * Ignore relocation tables for sections not
400 			 * loaded by the loader.
401 			 */
402 			if (shdr[shdr[i].sh_info].sh_addr == 0)
403 				break;
404 			ef->nreltab++;
405 			break;
406 		case SHT_RELA:
407 			if (shdr[shdr[i].sh_info].sh_addr == 0)
408 				break;
409 			ef->nrelatab++;
410 			break;
411 		}
412 	}
413 
414 	shstrindex = hdr->e_shstrndx;
415 	if (ef->nprogtab == 0 || symstrindex < 0 ||
416 	    symstrindex >= hdr->e_shnum ||
417 	    shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
418 	    shstrindex >= hdr->e_shnum ||
419 	    shdr[shstrindex].sh_type != SHT_STRTAB) {
420 		printf("%s: bad/missing section headers\n", filename);
421 		error = ENOEXEC;
422 		goto out;
423 	}
424 
425 	/* Allocate space for tracking the load chunks */
426 	if (ef->nprogtab != 0)
427 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
428 		    M_LINKER, M_WAITOK | M_ZERO);
429 	if (ef->nreltab != 0)
430 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
431 		    M_LINKER, M_WAITOK | M_ZERO);
432 	if (ef->nrelatab != 0)
433 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
434 		    M_LINKER, M_WAITOK | M_ZERO);
435 	if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
436 	    (ef->nreltab != 0 && ef->reltab == NULL) ||
437 	    (ef->nrelatab != 0 && ef->relatab == NULL)) {
438 		error = ENOMEM;
439 		goto out;
440 	}
441 
442 	/* XXX, relocate the sh_addr fields saved by the loader. */
443 	off = 0;
444 	for (i = 0; i < hdr->e_shnum; i++) {
445 		if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
446 			off = shdr[i].sh_addr;
447 	}
448 	for (i = 0; i < hdr->e_shnum; i++) {
449 		if (shdr[i].sh_addr != 0)
450 			shdr[i].sh_addr = shdr[i].sh_addr - off +
451 			    (Elf_Addr)ef->address;
452 	}
453 
454 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
455 	ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
456 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
457 	ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
458 	ef->shstrcnt = shdr[shstrindex].sh_size;
459 	ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
460 
461 	/* Now fill out progtab and the relocation tables. */
462 	pb = 0;
463 	rl = 0;
464 	ra = 0;
465 	for (i = 0; i < hdr->e_shnum; i++) {
466 		switch (shdr[i].sh_type) {
467 		case SHT_PROGBITS:
468 		case SHT_NOBITS:
469 #ifdef __amd64__
470 		case SHT_X86_64_UNWIND:
471 #endif
472 			if (shdr[i].sh_addr == 0)
473 				break;
474 			ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
475 			if (shdr[i].sh_type == SHT_PROGBITS)
476 				ef->progtab[pb].name = "<<PROGBITS>>";
477 #ifdef __amd64__
478 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
479 				ef->progtab[pb].name = "<<UNWIND>>";
480 #endif
481 			else
482 				ef->progtab[pb].name = "<<NOBITS>>";
483 			ef->progtab[pb].size = shdr[i].sh_size;
484 			ef->progtab[pb].flags = shdr[i].sh_flags;
485 			ef->progtab[pb].sec = i;
486 			if (ef->shstrtab && shdr[i].sh_name != 0)
487 				ef->progtab[pb].name =
488 				    ef->shstrtab + shdr[i].sh_name;
489 			if (ef->progtab[pb].name != NULL &&
490 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
491 				void *dpcpu;
492 
493 				dpcpu = dpcpu_alloc(shdr[i].sh_size);
494 				if (dpcpu == NULL) {
495 					printf("%s: pcpu module space is out "
496 					    "of space; cannot allocate %#jx "
497 					    "for %s\n", __func__,
498 					    (uintmax_t)shdr[i].sh_size,
499 					    filename);
500 					error = ENOSPC;
501 					goto out;
502 				}
503 				memcpy(dpcpu, ef->progtab[pb].addr,
504 				    ef->progtab[pb].size);
505 				dpcpu_copy(dpcpu, shdr[i].sh_size);
506 				ef->progtab[pb].addr = dpcpu;
507 #ifdef VIMAGE
508 			} else if (ef->progtab[pb].name != NULL &&
509 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
510 				void *vnet_data;
511 
512 				vnet_data = vnet_data_alloc(shdr[i].sh_size);
513 				if (vnet_data == NULL) {
514 					printf("%s: vnet module space is out "
515 					    "of space; cannot allocate %#jx "
516 					    "for %s\n", __func__,
517 					    (uintmax_t)shdr[i].sh_size,
518 					    filename);
519 					error = ENOSPC;
520 					goto out;
521 				}
522 				memcpy(vnet_data, ef->progtab[pb].addr,
523 				    ef->progtab[pb].size);
524 				vnet_data_copy(vnet_data, shdr[i].sh_size);
525 				ef->progtab[pb].addr = vnet_data;
526 #endif
527 			} else if (ef->progtab[pb].name != NULL &&
528 			    !strcmp(ef->progtab[pb].name, ".ctors")) {
529 				lf->ctors_addr = ef->progtab[pb].addr;
530 				lf->ctors_size = shdr[i].sh_size;
531 			}
532 
533 			/* Update all symbol values with the offset. */
534 			for (j = 0; j < ef->ddbsymcnt; j++) {
535 				es = &ef->ddbsymtab[j];
536 				if (es->st_shndx != i)
537 					continue;
538 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
539 			}
540 			pb++;
541 			break;
542 		case SHT_REL:
543 			if (shdr[shdr[i].sh_info].sh_addr == 0)
544 				break;
545 			ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
546 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
547 			ef->reltab[rl].sec = shdr[i].sh_info;
548 			rl++;
549 			break;
550 		case SHT_RELA:
551 			if (shdr[shdr[i].sh_info].sh_addr == 0)
552 				break;
553 			ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
554 			ef->relatab[ra].nrela =
555 			    shdr[i].sh_size / sizeof(Elf_Rela);
556 			ef->relatab[ra].sec = shdr[i].sh_info;
557 			ra++;
558 			break;
559 		}
560 	}
561 	if (pb != ef->nprogtab) {
562 		printf("%s: lost progbits\n", filename);
563 		error = ENOEXEC;
564 		goto out;
565 	}
566 	if (rl != ef->nreltab) {
567 		printf("%s: lost reltab\n", filename);
568 		error = ENOEXEC;
569 		goto out;
570 	}
571 	if (ra != ef->nrelatab) {
572 		printf("%s: lost relatab\n", filename);
573 		error = ENOEXEC;
574 		goto out;
575 	}
576 
577 	/*
578 	 * The file needs to be writeable and executable while applying
579 	 * relocations.  Mapping protections are applied once relocation
580 	 * processing is complete.
581 	 */
582 	link_elf_protect_range(ef, (vm_offset_t)ef->address,
583 	    round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
584 
585 	/* Local intra-module relocations */
586 	error = link_elf_reloc_local(lf, false);
587 	if (error != 0)
588 		goto out;
589 	*result = lf;
590 	return (0);
591 
592 out:
593 	/* preload not done this way */
594 	linker_file_unload(lf, LINKER_UNLOAD_FORCE);
595 	return (error);
596 }
597 
598 static void
599 link_elf_invoke_ctors(caddr_t addr, size_t size)
600 {
601 	void (**ctor)(void);
602 	size_t i, cnt;
603 
604 	if (addr == NULL || size == 0)
605 		return;
606 	cnt = size / sizeof(*ctor);
607 	ctor = (void *)addr;
608 	for (i = 0; i < cnt; i++) {
609 		if (ctor[i] != NULL)
610 			(*ctor[i])();
611 	}
612 }
613 
614 static int
615 link_elf_link_preload_finish(linker_file_t lf)
616 {
617 	elf_file_t ef;
618 	int error;
619 
620 	ef = (elf_file_t)lf;
621 	error = relocate_file(ef);
622 	if (error)
623 		return (error);
624 
625 	/* Notify MD code that a module is being loaded. */
626 	error = elf_cpu_load_file(lf);
627 	if (error)
628 		return (error);
629 
630 #if defined(__i386__) || defined(__amd64__)
631 	/* Now ifuncs. */
632 	error = link_elf_reloc_local(lf, true);
633 	if (error != 0)
634 		return (error);
635 #endif
636 
637 	/* Apply protections now that relocation processing is complete. */
638 	link_elf_protect(ef);
639 
640 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
641 	return (0);
642 }
643 
644 static int
645 link_elf_load_file(linker_class_t cls, const char *filename,
646     linker_file_t *result)
647 {
648 	struct nameidata *nd;
649 	struct thread *td = curthread;	/* XXX */
650 	Elf_Ehdr *hdr;
651 	Elf_Shdr *shdr;
652 	Elf_Sym *es;
653 	int nbytes, i, j;
654 	vm_offset_t mapbase;
655 	size_t mapsize;
656 	int error = 0;
657 	ssize_t resid;
658 	int flags;
659 	elf_file_t ef;
660 	linker_file_t lf;
661 	int symtabindex;
662 	int symstrindex;
663 	int shstrindex;
664 	int nsym;
665 	int pb, rl, ra;
666 	int alignmask;
667 
668 	shdr = NULL;
669 	lf = NULL;
670 	mapsize = 0;
671 	hdr = NULL;
672 
673 	nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
674 	NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
675 	flags = FREAD;
676 	error = vn_open(nd, &flags, 0, NULL);
677 	if (error) {
678 		free(nd, M_TEMP);
679 		return error;
680 	}
681 	NDFREE(nd, NDF_ONLY_PNBUF);
682 	if (nd->ni_vp->v_type != VREG) {
683 		error = ENOEXEC;
684 		goto out;
685 	}
686 #ifdef MAC
687 	error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
688 	if (error) {
689 		goto out;
690 	}
691 #endif
692 
693 	/* Read the elf header from the file. */
694 	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
695 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
696 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
697 	    &resid, td);
698 	if (error)
699 		goto out;
700 	if (resid != 0){
701 		error = ENOEXEC;
702 		goto out;
703 	}
704 
705 	if (!IS_ELF(*hdr)) {
706 		error = ENOEXEC;
707 		goto out;
708 	}
709 
710 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
711 	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
712 		link_elf_error(filename, "Unsupported file layout");
713 		error = ENOEXEC;
714 		goto out;
715 	}
716 	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
717 	    || hdr->e_version != EV_CURRENT) {
718 		link_elf_error(filename, "Unsupported file version");
719 		error = ENOEXEC;
720 		goto out;
721 	}
722 	if (hdr->e_type != ET_REL) {
723 		error = ENOSYS;
724 		goto out;
725 	}
726 	if (hdr->e_machine != ELF_TARG_MACH) {
727 		link_elf_error(filename, "Unsupported machine");
728 		error = ENOEXEC;
729 		goto out;
730 	}
731 
732 	lf = linker_make_file(filename, &link_elf_class);
733 	if (!lf) {
734 		error = ENOMEM;
735 		goto out;
736 	}
737 	ef = (elf_file_t) lf;
738 	ef->nprogtab = 0;
739 	ef->e_shdr = 0;
740 	ef->nreltab = 0;
741 	ef->nrelatab = 0;
742 
743 	/* Allocate and read in the section header */
744 	nbytes = hdr->e_shnum * hdr->e_shentsize;
745 	if (nbytes == 0 || hdr->e_shoff == 0 ||
746 	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
747 		error = ENOEXEC;
748 		goto out;
749 	}
750 	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
751 	ef->e_shdr = shdr;
752 	error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
753 	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
754 	    NOCRED, &resid, td);
755 	if (error)
756 		goto out;
757 	if (resid) {
758 		error = ENOEXEC;
759 		goto out;
760 	}
761 
762 	/* Scan the section header for information and table sizing. */
763 	nsym = 0;
764 	symtabindex = -1;
765 	symstrindex = -1;
766 	for (i = 0; i < hdr->e_shnum; i++) {
767 		if (shdr[i].sh_size == 0)
768 			continue;
769 		switch (shdr[i].sh_type) {
770 		case SHT_PROGBITS:
771 		case SHT_NOBITS:
772 #ifdef __amd64__
773 		case SHT_X86_64_UNWIND:
774 #endif
775 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
776 				break;
777 			ef->nprogtab++;
778 			break;
779 		case SHT_SYMTAB:
780 			nsym++;
781 			symtabindex = i;
782 			symstrindex = shdr[i].sh_link;
783 			break;
784 		case SHT_REL:
785 			/*
786 			 * Ignore relocation tables for unallocated
787 			 * sections.
788 			 */
789 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
790 				break;
791 			ef->nreltab++;
792 			break;
793 		case SHT_RELA:
794 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
795 				break;
796 			ef->nrelatab++;
797 			break;
798 		case SHT_STRTAB:
799 			break;
800 		}
801 	}
802 	if (ef->nprogtab == 0) {
803 		link_elf_error(filename, "file has no contents");
804 		error = ENOEXEC;
805 		goto out;
806 	}
807 	if (nsym != 1) {
808 		/* Only allow one symbol table for now */
809 		link_elf_error(filename,
810 		    "file must have exactly one symbol table");
811 		error = ENOEXEC;
812 		goto out;
813 	}
814 	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
815 	    shdr[symstrindex].sh_type != SHT_STRTAB) {
816 		link_elf_error(filename, "file has invalid symbol strings");
817 		error = ENOEXEC;
818 		goto out;
819 	}
820 
821 	/* Allocate space for tracking the load chunks */
822 	if (ef->nprogtab != 0)
823 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
824 		    M_LINKER, M_WAITOK | M_ZERO);
825 	if (ef->nreltab != 0)
826 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
827 		    M_LINKER, M_WAITOK | M_ZERO);
828 	if (ef->nrelatab != 0)
829 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
830 		    M_LINKER, M_WAITOK | M_ZERO);
831 
832 	if (symtabindex == -1) {
833 		link_elf_error(filename, "lost symbol table index");
834 		error = ENOEXEC;
835 		goto out;
836 	}
837 	/* Allocate space for and load the symbol table */
838 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
839 	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
840 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
841 	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
842 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
843 	    &resid, td);
844 	if (error)
845 		goto out;
846 	if (resid != 0){
847 		error = EINVAL;
848 		goto out;
849 	}
850 
851 	/* Allocate space for and load the symbol strings */
852 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
853 	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
854 	error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
855 	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
856 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
857 	    &resid, td);
858 	if (error)
859 		goto out;
860 	if (resid != 0){
861 		error = EINVAL;
862 		goto out;
863 	}
864 
865 	/* Do we have a string table for the section names?  */
866 	shstrindex = -1;
867 	if (hdr->e_shstrndx != 0 &&
868 	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
869 		shstrindex = hdr->e_shstrndx;
870 		ef->shstrcnt = shdr[shstrindex].sh_size;
871 		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
872 		    M_WAITOK);
873 		error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
874 		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
875 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
876 		    &resid, td);
877 		if (error)
878 			goto out;
879 		if (resid != 0){
880 			error = EINVAL;
881 			goto out;
882 		}
883 	}
884 
885 	/* Size up code/data(progbits) and bss(nobits). */
886 	alignmask = 0;
887 	for (i = 0; i < hdr->e_shnum; i++) {
888 		if (shdr[i].sh_size == 0)
889 			continue;
890 		switch (shdr[i].sh_type) {
891 		case SHT_PROGBITS:
892 		case SHT_NOBITS:
893 #ifdef __amd64__
894 		case SHT_X86_64_UNWIND:
895 #endif
896 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
897 				break;
898 			alignmask = shdr[i].sh_addralign - 1;
899 			mapsize += alignmask;
900 			mapsize &= ~alignmask;
901 			mapsize += shdr[i].sh_size;
902 			break;
903 		}
904 	}
905 
906 	/*
907 	 * We know how much space we need for the text/data/bss/etc.
908 	 * This stuff needs to be in a single chunk so that profiling etc
909 	 * can get the bounds and gdb can associate offsets with modules
910 	 */
911 	ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
912 	    VM_PROT_ALL, 0, thread0.td_ucred);
913 	if (ef->object == NULL) {
914 		error = ENOMEM;
915 		goto out;
916 	}
917 #if VM_NRESERVLEVEL > 0
918 	vm_object_color(ef->object, 0);
919 #endif
920 
921 	/*
922 	 * In order to satisfy amd64's architectural requirements on the
923 	 * location of code and data in the kernel's address space, request a
924 	 * mapping that is above the kernel.
925 	 *
926 	 * Protections will be restricted once relocations are applied.
927 	 */
928 #ifdef __amd64__
929 	mapbase = KERNBASE;
930 #else
931 	mapbase = VM_MIN_KERNEL_ADDRESS;
932 #endif
933 	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
934 	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
935 	    VM_PROT_ALL, 0);
936 	if (error != KERN_SUCCESS) {
937 		vm_object_deallocate(ef->object);
938 		ef->object = NULL;
939 		error = ENOMEM;
940 		goto out;
941 	}
942 
943 	/* Wire the pages */
944 	error = vm_map_wire(kernel_map, mapbase,
945 	    mapbase + round_page(mapsize),
946 	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
947 	if (error != KERN_SUCCESS) {
948 		error = ENOMEM;
949 		goto out;
950 	}
951 
952 	/* Inform the kld system about the situation */
953 	lf->address = ef->address = (caddr_t)mapbase;
954 	lf->size = mapsize;
955 
956 	/*
957 	 * Now load code/data(progbits), zero bss(nobits), allocate space for
958 	 * and load relocs
959 	 */
960 	pb = 0;
961 	rl = 0;
962 	ra = 0;
963 	alignmask = 0;
964 	for (i = 0; i < hdr->e_shnum; i++) {
965 		if (shdr[i].sh_size == 0)
966 			continue;
967 		switch (shdr[i].sh_type) {
968 		case SHT_PROGBITS:
969 		case SHT_NOBITS:
970 #ifdef __amd64__
971 		case SHT_X86_64_UNWIND:
972 #endif
973 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
974 				break;
975 			alignmask = shdr[i].sh_addralign - 1;
976 			mapbase += alignmask;
977 			mapbase &= ~alignmask;
978 			if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
979 				ef->progtab[pb].name =
980 				    ef->shstrtab + shdr[i].sh_name;
981 				if (!strcmp(ef->progtab[pb].name, ".ctors")) {
982 					lf->ctors_addr = (caddr_t)mapbase;
983 					lf->ctors_size = shdr[i].sh_size;
984 				}
985 			} else if (shdr[i].sh_type == SHT_PROGBITS)
986 				ef->progtab[pb].name = "<<PROGBITS>>";
987 #ifdef __amd64__
988 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
989 				ef->progtab[pb].name = "<<UNWIND>>";
990 #endif
991 			else
992 				ef->progtab[pb].name = "<<NOBITS>>";
993 			if (ef->progtab[pb].name != NULL &&
994 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
995 				ef->progtab[pb].addr =
996 				    dpcpu_alloc(shdr[i].sh_size);
997 				if (ef->progtab[pb].addr == NULL) {
998 					printf("%s: pcpu module space is out "
999 					    "of space; cannot allocate %#jx "
1000 					    "for %s\n", __func__,
1001 					    (uintmax_t)shdr[i].sh_size,
1002 					    filename);
1003 				}
1004 			}
1005 #ifdef VIMAGE
1006 			else if (ef->progtab[pb].name != NULL &&
1007 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1008 				ef->progtab[pb].addr =
1009 				    vnet_data_alloc(shdr[i].sh_size);
1010 				if (ef->progtab[pb].addr == NULL) {
1011 					printf("%s: vnet module space is out "
1012 					    "of space; cannot allocate %#jx "
1013 					    "for %s\n", __func__,
1014 					    (uintmax_t)shdr[i].sh_size,
1015 					    filename);
1016 				}
1017 			}
1018 #endif
1019 			else
1020 				ef->progtab[pb].addr =
1021 				    (void *)(uintptr_t)mapbase;
1022 			if (ef->progtab[pb].addr == NULL) {
1023 				error = ENOSPC;
1024 				goto out;
1025 			}
1026 			ef->progtab[pb].size = shdr[i].sh_size;
1027 			ef->progtab[pb].flags = shdr[i].sh_flags;
1028 			ef->progtab[pb].sec = i;
1029 			if (shdr[i].sh_type == SHT_PROGBITS
1030 #ifdef __amd64__
1031 			    || shdr[i].sh_type == SHT_X86_64_UNWIND
1032 #endif
1033 			    ) {
1034 				error = vn_rdwr(UIO_READ, nd->ni_vp,
1035 				    ef->progtab[pb].addr,
1036 				    shdr[i].sh_size, shdr[i].sh_offset,
1037 				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1038 				    NOCRED, &resid, td);
1039 				if (error)
1040 					goto out;
1041 				if (resid != 0){
1042 					error = EINVAL;
1043 					goto out;
1044 				}
1045 				/* Initialize the per-cpu or vnet area. */
1046 				if (ef->progtab[pb].addr != (void *)mapbase &&
1047 				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1048 					dpcpu_copy(ef->progtab[pb].addr,
1049 					    shdr[i].sh_size);
1050 #ifdef VIMAGE
1051 				else if (ef->progtab[pb].addr !=
1052 				    (void *)mapbase &&
1053 				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1054 					vnet_data_copy(ef->progtab[pb].addr,
1055 					    shdr[i].sh_size);
1056 #endif
1057 			} else
1058 				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1059 
1060 			/* Update all symbol values with the offset. */
1061 			for (j = 0; j < ef->ddbsymcnt; j++) {
1062 				es = &ef->ddbsymtab[j];
1063 				if (es->st_shndx != i)
1064 					continue;
1065 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1066 			}
1067 			mapbase += shdr[i].sh_size;
1068 			pb++;
1069 			break;
1070 		case SHT_REL:
1071 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1072 				break;
1073 			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1074 			    M_WAITOK);
1075 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1076 			ef->reltab[rl].sec = shdr[i].sh_info;
1077 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1078 			    (void *)ef->reltab[rl].rel,
1079 			    shdr[i].sh_size, shdr[i].sh_offset,
1080 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1081 			    &resid, td);
1082 			if (error)
1083 				goto out;
1084 			if (resid != 0){
1085 				error = EINVAL;
1086 				goto out;
1087 			}
1088 			rl++;
1089 			break;
1090 		case SHT_RELA:
1091 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1092 				break;
1093 			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1094 			    M_WAITOK);
1095 			ef->relatab[ra].nrela =
1096 			    shdr[i].sh_size / sizeof(Elf_Rela);
1097 			ef->relatab[ra].sec = shdr[i].sh_info;
1098 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1099 			    (void *)ef->relatab[ra].rela,
1100 			    shdr[i].sh_size, shdr[i].sh_offset,
1101 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1102 			    &resid, td);
1103 			if (error)
1104 				goto out;
1105 			if (resid != 0){
1106 				error = EINVAL;
1107 				goto out;
1108 			}
1109 			ra++;
1110 			break;
1111 		}
1112 	}
1113 	if (pb != ef->nprogtab) {
1114 		link_elf_error(filename, "lost progbits");
1115 		error = ENOEXEC;
1116 		goto out;
1117 	}
1118 	if (rl != ef->nreltab) {
1119 		link_elf_error(filename, "lost reltab");
1120 		error = ENOEXEC;
1121 		goto out;
1122 	}
1123 	if (ra != ef->nrelatab) {
1124 		link_elf_error(filename, "lost relatab");
1125 		error = ENOEXEC;
1126 		goto out;
1127 	}
1128 	if (mapbase != (vm_offset_t)ef->address + mapsize) {
1129 		printf(
1130 		    "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1131 		    filename != NULL ? filename : "<none>",
1132 		    (u_long)mapbase, ef->address, (u_long)mapsize,
1133 		    (u_long)(vm_offset_t)ef->address + mapsize);
1134 		error = ENOMEM;
1135 		goto out;
1136 	}
1137 
1138 	/* Local intra-module relocations */
1139 	error = link_elf_reloc_local(lf, false);
1140 	if (error != 0)
1141 		goto out;
1142 
1143 	/* Pull in dependencies */
1144 	VOP_UNLOCK(nd->ni_vp);
1145 	error = linker_load_dependencies(lf);
1146 	vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1147 	if (error)
1148 		goto out;
1149 
1150 	/* External relocations */
1151 	error = relocate_file(ef);
1152 	if (error)
1153 		goto out;
1154 
1155 	/* Notify MD code that a module is being loaded. */
1156 	error = elf_cpu_load_file(lf);
1157 	if (error)
1158 		goto out;
1159 
1160 #if defined(__i386__) || defined(__amd64__)
1161 	/* Now ifuncs. */
1162 	error = link_elf_reloc_local(lf, true);
1163 	if (error != 0)
1164 		goto out;
1165 #endif
1166 
1167 	link_elf_protect(ef);
1168 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
1169 	*result = lf;
1170 
1171 out:
1172 	VOP_UNLOCK(nd->ni_vp);
1173 	vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1174 	free(nd, M_TEMP);
1175 	if (error && lf)
1176 		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1177 	free(hdr, M_LINKER);
1178 
1179 	return error;
1180 }
1181 
1182 static void
1183 link_elf_unload_file(linker_file_t file)
1184 {
1185 	elf_file_t ef = (elf_file_t) file;
1186 	u_int i;
1187 
1188 	/* Notify MD code that a module is being unloaded. */
1189 	elf_cpu_unload_file(file);
1190 
1191 	if (ef->progtab) {
1192 		for (i = 0; i < ef->nprogtab; i++) {
1193 			if (ef->progtab[i].size == 0)
1194 				continue;
1195 			if (ef->progtab[i].name == NULL)
1196 				continue;
1197 			if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1198 				dpcpu_free(ef->progtab[i].addr,
1199 				    ef->progtab[i].size);
1200 #ifdef VIMAGE
1201 			else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1202 				vnet_data_free(ef->progtab[i].addr,
1203 				    ef->progtab[i].size);
1204 #endif
1205 		}
1206 	}
1207 	if (ef->preloaded) {
1208 		free(ef->reltab, M_LINKER);
1209 		free(ef->relatab, M_LINKER);
1210 		free(ef->progtab, M_LINKER);
1211 		free(ef->ctftab, M_LINKER);
1212 		free(ef->ctfoff, M_LINKER);
1213 		free(ef->typoff, M_LINKER);
1214 		if (file->pathname != NULL)
1215 			preload_delete_name(file->pathname);
1216 		return;
1217 	}
1218 
1219 	for (i = 0; i < ef->nreltab; i++)
1220 		free(ef->reltab[i].rel, M_LINKER);
1221 	for (i = 0; i < ef->nrelatab; i++)
1222 		free(ef->relatab[i].rela, M_LINKER);
1223 	free(ef->reltab, M_LINKER);
1224 	free(ef->relatab, M_LINKER);
1225 	free(ef->progtab, M_LINKER);
1226 
1227 	if (ef->object != NULL)
1228 		vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1229 		    (vm_offset_t)ef->address + ptoa(ef->object->size));
1230 	free(ef->e_shdr, M_LINKER);
1231 	free(ef->ddbsymtab, M_LINKER);
1232 	free(ef->ddbstrtab, M_LINKER);
1233 	free(ef->shstrtab, M_LINKER);
1234 	free(ef->ctftab, M_LINKER);
1235 	free(ef->ctfoff, M_LINKER);
1236 	free(ef->typoff, M_LINKER);
1237 }
1238 
1239 static const char *
1240 symbol_name(elf_file_t ef, Elf_Size r_info)
1241 {
1242 	const Elf_Sym *ref;
1243 
1244 	if (ELF_R_SYM(r_info)) {
1245 		ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1246 		return ef->ddbstrtab + ref->st_name;
1247 	} else
1248 		return NULL;
1249 }
1250 
1251 static Elf_Addr
1252 findbase(elf_file_t ef, int sec)
1253 {
1254 	int i;
1255 	Elf_Addr base = 0;
1256 
1257 	for (i = 0; i < ef->nprogtab; i++) {
1258 		if (sec == ef->progtab[i].sec) {
1259 			base = (Elf_Addr)ef->progtab[i].addr;
1260 			break;
1261 		}
1262 	}
1263 	return base;
1264 }
1265 
1266 static int
1267 relocate_file(elf_file_t ef)
1268 {
1269 	const Elf_Rel *rellim;
1270 	const Elf_Rel *rel;
1271 	const Elf_Rela *relalim;
1272 	const Elf_Rela *rela;
1273 	const char *symname;
1274 	const Elf_Sym *sym;
1275 	int i;
1276 	Elf_Size symidx;
1277 	Elf_Addr base;
1278 
1279 	/* Perform relocations without addend if there are any: */
1280 	for (i = 0; i < ef->nreltab; i++) {
1281 		rel = ef->reltab[i].rel;
1282 		if (rel == NULL) {
1283 			link_elf_error(ef->lf.filename, "lost a reltab!");
1284 			return (ENOEXEC);
1285 		}
1286 		rellim = rel + ef->reltab[i].nrel;
1287 		base = findbase(ef, ef->reltab[i].sec);
1288 		if (base == 0) {
1289 			link_elf_error(ef->lf.filename, "lost base for reltab");
1290 			return (ENOEXEC);
1291 		}
1292 		for ( ; rel < rellim; rel++) {
1293 			symidx = ELF_R_SYM(rel->r_info);
1294 			if (symidx >= ef->ddbsymcnt)
1295 				continue;
1296 			sym = ef->ddbsymtab + symidx;
1297 			/* Local relocs are already done */
1298 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1299 				continue;
1300 			if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1301 			    elf_obj_lookup)) {
1302 				symname = symbol_name(ef, rel->r_info);
1303 				printf("link_elf_obj: symbol %s undefined\n",
1304 				    symname);
1305 				return (ENOENT);
1306 			}
1307 		}
1308 	}
1309 
1310 	/* Perform relocations with addend if there are any: */
1311 	for (i = 0; i < ef->nrelatab; i++) {
1312 		rela = ef->relatab[i].rela;
1313 		if (rela == NULL) {
1314 			link_elf_error(ef->lf.filename, "lost a relatab!");
1315 			return (ENOEXEC);
1316 		}
1317 		relalim = rela + ef->relatab[i].nrela;
1318 		base = findbase(ef, ef->relatab[i].sec);
1319 		if (base == 0) {
1320 			link_elf_error(ef->lf.filename,
1321 			    "lost base for relatab");
1322 			return (ENOEXEC);
1323 		}
1324 		for ( ; rela < relalim; rela++) {
1325 			symidx = ELF_R_SYM(rela->r_info);
1326 			if (symidx >= ef->ddbsymcnt)
1327 				continue;
1328 			sym = ef->ddbsymtab + symidx;
1329 			/* Local relocs are already done */
1330 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1331 				continue;
1332 			if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1333 			    elf_obj_lookup)) {
1334 				symname = symbol_name(ef, rela->r_info);
1335 				printf("link_elf_obj: symbol %s undefined\n",
1336 				    symname);
1337 				return (ENOENT);
1338 			}
1339 		}
1340 	}
1341 
1342 	/*
1343 	 * Only clean SHN_FBSD_CACHED for successful return.  If we
1344 	 * modified symbol table for the object but found an
1345 	 * unresolved symbol, there is no reason to roll back.
1346 	 */
1347 	elf_obj_cleanup_globals_cache(ef);
1348 
1349 	return (0);
1350 }
1351 
1352 static int
1353 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1354 {
1355 	elf_file_t ef = (elf_file_t) lf;
1356 	const Elf_Sym *symp;
1357 	const char *strp;
1358 	int i;
1359 
1360 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1361 		strp = ef->ddbstrtab + symp->st_name;
1362 		if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1363 			*sym = (c_linker_sym_t) symp;
1364 			return 0;
1365 		}
1366 	}
1367 	return ENOENT;
1368 }
1369 
1370 static int
1371 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1372     linker_symval_t *symval)
1373 {
1374 	elf_file_t ef;
1375 	const Elf_Sym *es;
1376 	caddr_t val;
1377 
1378 	ef = (elf_file_t) lf;
1379 	es = (const Elf_Sym*) sym;
1380 	val = (caddr_t)es->st_value;
1381 	if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1382 		symval->name = ef->ddbstrtab + es->st_name;
1383 		val = (caddr_t)es->st_value;
1384 		if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1385 			val = ((caddr_t (*)(void))val)();
1386 		symval->value = val;
1387 		symval->size = es->st_size;
1388 		return 0;
1389 	}
1390 	return ENOENT;
1391 }
1392 
1393 static int
1394 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1395     c_linker_sym_t *sym, long *diffp)
1396 {
1397 	elf_file_t ef = (elf_file_t) lf;
1398 	u_long off = (uintptr_t) (void *) value;
1399 	u_long diff = off;
1400 	u_long st_value;
1401 	const Elf_Sym *es;
1402 	const Elf_Sym *best = NULL;
1403 	int i;
1404 
1405 	for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1406 		if (es->st_name == 0)
1407 			continue;
1408 		st_value = es->st_value;
1409 		if (off >= st_value) {
1410 			if (off - st_value < diff) {
1411 				diff = off - st_value;
1412 				best = es;
1413 				if (diff == 0)
1414 					break;
1415 			} else if (off - st_value == diff) {
1416 				best = es;
1417 			}
1418 		}
1419 	}
1420 	if (best == NULL)
1421 		*diffp = off;
1422 	else
1423 		*diffp = diff;
1424 	*sym = (c_linker_sym_t) best;
1425 
1426 	return 0;
1427 }
1428 
1429 /*
1430  * Look up a linker set on an ELF system.
1431  */
1432 static int
1433 link_elf_lookup_set(linker_file_t lf, const char *name,
1434     void ***startp, void ***stopp, int *countp)
1435 {
1436 	elf_file_t ef = (elf_file_t)lf;
1437 	void **start, **stop;
1438 	int i, count;
1439 
1440 	/* Relative to section number */
1441 	for (i = 0; i < ef->nprogtab; i++) {
1442 		if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1443 		    strcmp(ef->progtab[i].name + 4, name) == 0) {
1444 			start  = (void **)ef->progtab[i].addr;
1445 			stop = (void **)((char *)ef->progtab[i].addr +
1446 			    ef->progtab[i].size);
1447 			count = stop - start;
1448 			if (startp)
1449 				*startp = start;
1450 			if (stopp)
1451 				*stopp = stop;
1452 			if (countp)
1453 				*countp = count;
1454 			return (0);
1455 		}
1456 	}
1457 	return (ESRCH);
1458 }
1459 
1460 static int
1461 link_elf_each_function_name(linker_file_t file,
1462     int (*callback)(const char *, void *), void *opaque)
1463 {
1464 	elf_file_t ef = (elf_file_t)file;
1465 	const Elf_Sym *symp;
1466 	int i, error;
1467 
1468 	/* Exhaustive search */
1469 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1470 		if (symp->st_value != 0 &&
1471 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1472 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1473 			error = callback(ef->ddbstrtab + symp->st_name, opaque);
1474 			if (error)
1475 				return (error);
1476 		}
1477 	}
1478 	return (0);
1479 }
1480 
1481 static int
1482 link_elf_each_function_nameval(linker_file_t file,
1483     linker_function_nameval_callback_t callback, void *opaque)
1484 {
1485 	linker_symval_t symval;
1486 	elf_file_t ef = (elf_file_t)file;
1487 	const Elf_Sym* symp;
1488 	int i, error;
1489 
1490 	/* Exhaustive search */
1491 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1492 		if (symp->st_value != 0 &&
1493 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1494 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1495 			error = link_elf_symbol_values(file,
1496 			    (c_linker_sym_t)symp, &symval);
1497 			if (error)
1498 				return (error);
1499 			error = callback(file, i, &symval, opaque);
1500 			if (error)
1501 				return (error);
1502 		}
1503 	}
1504 	return (0);
1505 }
1506 
1507 static void
1508 elf_obj_cleanup_globals_cache(elf_file_t ef)
1509 {
1510 	Elf_Sym *sym;
1511 	Elf_Size i;
1512 
1513 	for (i = 0; i < ef->ddbsymcnt; i++) {
1514 		sym = ef->ddbsymtab + i;
1515 		if (sym->st_shndx == SHN_FBSD_CACHED) {
1516 			sym->st_shndx = SHN_UNDEF;
1517 			sym->st_value = 0;
1518 		}
1519 	}
1520 }
1521 
1522 /*
1523  * Symbol lookup function that can be used when the symbol index is known (ie
1524  * in relocations). It uses the symbol index instead of doing a fully fledged
1525  * hash table based lookup when such is valid. For example for local symbols.
1526  * This is not only more efficient, it's also more correct. It's not always
1527  * the case that the symbol can be found through the hash table.
1528  */
1529 static int
1530 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1531 {
1532 	elf_file_t ef = (elf_file_t)lf;
1533 	Elf_Sym *sym;
1534 	const char *symbol;
1535 	Elf_Addr res1;
1536 
1537 	/* Don't even try to lookup the symbol if the index is bogus. */
1538 	if (symidx >= ef->ddbsymcnt) {
1539 		*res = 0;
1540 		return (EINVAL);
1541 	}
1542 
1543 	sym = ef->ddbsymtab + symidx;
1544 
1545 	/* Quick answer if there is a definition included. */
1546 	if (sym->st_shndx != SHN_UNDEF) {
1547 		res1 = (Elf_Addr)sym->st_value;
1548 		if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1549 			res1 = ((Elf_Addr (*)(void))res1)();
1550 		*res = res1;
1551 		return (0);
1552 	}
1553 
1554 	/* If we get here, then it is undefined and needs a lookup. */
1555 	switch (ELF_ST_BIND(sym->st_info)) {
1556 	case STB_LOCAL:
1557 		/* Local, but undefined? huh? */
1558 		*res = 0;
1559 		return (EINVAL);
1560 
1561 	case STB_GLOBAL:
1562 	case STB_WEAK:
1563 		/* Relative to Data or Function name */
1564 		symbol = ef->ddbstrtab + sym->st_name;
1565 
1566 		/* Force a lookup failure if the symbol name is bogus. */
1567 		if (*symbol == 0) {
1568 			*res = 0;
1569 			return (EINVAL);
1570 		}
1571 		res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1572 
1573 		/*
1574 		 * Cache global lookups during module relocation. The failure
1575 		 * case is particularly expensive for callers, who must scan
1576 		 * through the entire globals table doing strcmp(). Cache to
1577 		 * avoid doing such work repeatedly.
1578 		 *
1579 		 * After relocation is complete, undefined globals will be
1580 		 * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1581 		 * above.
1582 		 */
1583 		if (res1 != 0) {
1584 			sym->st_shndx = SHN_FBSD_CACHED;
1585 			sym->st_value = res1;
1586 			*res = res1;
1587 			return (0);
1588 		} else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1589 			sym->st_value = 0;
1590 			*res = 0;
1591 			return (0);
1592 		}
1593 		return (EINVAL);
1594 
1595 	default:
1596 		return (EINVAL);
1597 	}
1598 }
1599 
1600 static void
1601 link_elf_fix_link_set(elf_file_t ef)
1602 {
1603 	static const char startn[] = "__start_";
1604 	static const char stopn[] = "__stop_";
1605 	Elf_Sym *sym;
1606 	const char *sym_name, *linkset_name;
1607 	Elf_Addr startp, stopp;
1608 	Elf_Size symidx;
1609 	int start, i;
1610 
1611 	startp = stopp = 0;
1612 	for (symidx = 1 /* zero entry is special */;
1613 		symidx < ef->ddbsymcnt; symidx++) {
1614 		sym = ef->ddbsymtab + symidx;
1615 		if (sym->st_shndx != SHN_UNDEF)
1616 			continue;
1617 
1618 		sym_name = ef->ddbstrtab + sym->st_name;
1619 		if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1620 			start = 1;
1621 			linkset_name = sym_name + sizeof(startn) - 1;
1622 		}
1623 		else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1624 			start = 0;
1625 			linkset_name = sym_name + sizeof(stopn) - 1;
1626 		}
1627 		else
1628 			continue;
1629 
1630 		for (i = 0; i < ef->nprogtab; i++) {
1631 			if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1632 				startp = (Elf_Addr)ef->progtab[i].addr;
1633 				stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1634 				break;
1635 			}
1636 		}
1637 		if (i == ef->nprogtab)
1638 			continue;
1639 
1640 		sym->st_value = start ? startp : stopp;
1641 		sym->st_shndx = i;
1642 	}
1643 }
1644 
1645 static int
1646 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1647 {
1648 	elf_file_t ef = (elf_file_t)lf;
1649 	const Elf_Rel *rellim;
1650 	const Elf_Rel *rel;
1651 	const Elf_Rela *relalim;
1652 	const Elf_Rela *rela;
1653 	const Elf_Sym *sym;
1654 	Elf_Addr base;
1655 	int i;
1656 	Elf_Size symidx;
1657 
1658 	link_elf_fix_link_set(ef);
1659 
1660 	/* Perform relocations without addend if there are any: */
1661 	for (i = 0; i < ef->nreltab; i++) {
1662 		rel = ef->reltab[i].rel;
1663 		if (rel == NULL) {
1664 			link_elf_error(ef->lf.filename, "lost a reltab");
1665 			return (ENOEXEC);
1666 		}
1667 		rellim = rel + ef->reltab[i].nrel;
1668 		base = findbase(ef, ef->reltab[i].sec);
1669 		if (base == 0) {
1670 			link_elf_error(ef->lf.filename, "lost base for reltab");
1671 			return (ENOEXEC);
1672 		}
1673 		for ( ; rel < rellim; rel++) {
1674 			symidx = ELF_R_SYM(rel->r_info);
1675 			if (symidx >= ef->ddbsymcnt)
1676 				continue;
1677 			sym = ef->ddbsymtab + symidx;
1678 			/* Only do local relocs */
1679 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1680 				continue;
1681 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1682 			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1683 				continue;
1684 			if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1685 			    elf_obj_lookup) != 0)
1686 				return (ENOEXEC);
1687 		}
1688 	}
1689 
1690 	/* Perform relocations with addend if there are any: */
1691 	for (i = 0; i < ef->nrelatab; i++) {
1692 		rela = ef->relatab[i].rela;
1693 		if (rela == NULL) {
1694 			link_elf_error(ef->lf.filename, "lost a relatab!");
1695 			return (ENOEXEC);
1696 		}
1697 		relalim = rela + ef->relatab[i].nrela;
1698 		base = findbase(ef, ef->relatab[i].sec);
1699 		if (base == 0) {
1700 			link_elf_error(ef->lf.filename, "lost base for reltab");
1701 			return (ENOEXEC);
1702 		}
1703 		for ( ; rela < relalim; rela++) {
1704 			symidx = ELF_R_SYM(rela->r_info);
1705 			if (symidx >= ef->ddbsymcnt)
1706 				continue;
1707 			sym = ef->ddbsymtab + symidx;
1708 			/* Only do local relocs */
1709 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1710 				continue;
1711 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1712 			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1713 				continue;
1714 			if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1715 			    elf_obj_lookup) != 0)
1716 				return (ENOEXEC);
1717 		}
1718 	}
1719 	return (0);
1720 }
1721 
1722 static long
1723 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1724 {
1725     elf_file_t ef = (elf_file_t)lf;
1726 
1727     *symtab = ef->ddbsymtab;
1728 
1729     if (*symtab == NULL)
1730         return (0);
1731 
1732     return (ef->ddbsymcnt);
1733 }
1734 
1735 static long
1736 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1737 {
1738     elf_file_t ef = (elf_file_t)lf;
1739 
1740     *strtab = ef->ddbstrtab;
1741 
1742     if (*strtab == NULL)
1743         return (0);
1744 
1745     return (ef->ddbstrcnt);
1746 }
1747