xref: /freebsd/sys/kern/link_elf_obj.c (revision 7791ecf04b48a0c365b003447f479ec890115dfc)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_ddb.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/linker.h>
42 #include <sys/mutex.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/vnode.h>
48 
49 #include <machine/elf.h>
50 
51 #include <net/vnet.h>
52 
53 #include <security/mac/mac_framework.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_param.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64 
65 #include <sys/link_elf.h>
66 
67 #ifdef DDB_CTF
68 #include <contrib/zlib/zlib.h>
69 #endif
70 
71 #include "linker_if.h"
72 
73 typedef struct {
74 	void		*addr;
75 	Elf_Off		size;
76 	int		flags;	/* Section flags. */
77 	int		sec;	/* Original section number. */
78 	char		*name;
79 } Elf_progent;
80 
81 typedef struct {
82 	Elf_Rel		*rel;
83 	int		nrel;
84 	int		sec;
85 } Elf_relent;
86 
87 typedef struct {
88 	Elf_Rela	*rela;
89 	int		nrela;
90 	int		sec;
91 } Elf_relaent;
92 
93 typedef struct elf_file {
94 	struct linker_file lf;		/* Common fields */
95 
96 	int		preloaded;
97 	caddr_t		address;	/* Relocation address */
98 	vm_object_t	object;		/* VM object to hold file pages */
99 	Elf_Shdr	*e_shdr;
100 
101 	Elf_progent	*progtab;
102 	u_int		nprogtab;
103 
104 	Elf_relaent	*relatab;
105 	u_int		nrelatab;
106 
107 	Elf_relent	*reltab;
108 	int		nreltab;
109 
110 	Elf_Sym		*ddbsymtab;	/* The symbol table we are using */
111 	long		ddbsymcnt;	/* Number of symbols */
112 	caddr_t		ddbstrtab;	/* String table */
113 	long		ddbstrcnt;	/* number of bytes in string table */
114 
115 	caddr_t		shstrtab;	/* Section name string table */
116 	long		shstrcnt;	/* number of bytes in string table */
117 
118 	caddr_t		ctftab;		/* CTF table */
119 	long		ctfcnt;		/* number of bytes in CTF table */
120 	caddr_t		ctfoff;		/* CTF offset table */
121 	caddr_t		typoff;		/* Type offset table */
122 	long		typlen;		/* Number of type entries. */
123 
124 } *elf_file_t;
125 
126 #include <kern/kern_ctf.c>
127 
128 static int	link_elf_link_preload(linker_class_t cls,
129 		    const char *, linker_file_t *);
130 static int	link_elf_link_preload_finish(linker_file_t);
131 static int	link_elf_load_file(linker_class_t, const char *, linker_file_t *);
132 static int	link_elf_lookup_symbol(linker_file_t, const char *,
133 		    c_linker_sym_t *);
134 static int	link_elf_symbol_values(linker_file_t, c_linker_sym_t,
135 		    linker_symval_t *);
136 static int	link_elf_search_symbol(linker_file_t, caddr_t value,
137 		    c_linker_sym_t *sym, long *diffp);
138 
139 static void	link_elf_unload_file(linker_file_t);
140 static int	link_elf_lookup_set(linker_file_t, const char *,
141 		    void ***, void ***, int *);
142 static int	link_elf_each_function_name(linker_file_t,
143 		    int (*)(const char *, void *), void *);
144 static int	link_elf_each_function_nameval(linker_file_t,
145 				linker_function_nameval_callback_t,
146 				void *);
147 static int	link_elf_reloc_local(linker_file_t, bool);
148 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
149 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
150 
151 static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
152 		    Elf_Addr *);
153 
154 static kobj_method_t link_elf_methods[] = {
155 	KOBJMETHOD(linker_lookup_symbol,	link_elf_lookup_symbol),
156 	KOBJMETHOD(linker_symbol_values,	link_elf_symbol_values),
157 	KOBJMETHOD(linker_search_symbol,	link_elf_search_symbol),
158 	KOBJMETHOD(linker_unload,		link_elf_unload_file),
159 	KOBJMETHOD(linker_load_file,		link_elf_load_file),
160 	KOBJMETHOD(linker_link_preload,		link_elf_link_preload),
161 	KOBJMETHOD(linker_link_preload_finish,	link_elf_link_preload_finish),
162 	KOBJMETHOD(linker_lookup_set,		link_elf_lookup_set),
163 	KOBJMETHOD(linker_each_function_name,	link_elf_each_function_name),
164 	KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
165 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
166 	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
167 	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
168 	KOBJMETHOD_END
169 };
170 
171 static struct linker_class link_elf_class = {
172 #if ELF_TARG_CLASS == ELFCLASS32
173 	"elf32_obj",
174 #else
175 	"elf64_obj",
176 #endif
177 	link_elf_methods, sizeof(struct elf_file)
178 };
179 
180 static int	relocate_file(elf_file_t ef);
181 static void	elf_obj_cleanup_globals_cache(elf_file_t);
182 
183 static void
184 link_elf_error(const char *filename, const char *s)
185 {
186 	if (filename == NULL)
187 		printf("kldload: %s\n", s);
188 	else
189 		printf("kldload: %s: %s\n", filename, s);
190 }
191 
192 static void
193 link_elf_init(void *arg)
194 {
195 
196 	linker_add_class(&link_elf_class);
197 }
198 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
199 
200 static void
201 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
202     vm_prot_t prot)
203 {
204 	int error __unused;
205 
206 	KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
207 	    end <= round_page((vm_offset_t)ef->address + ef->lf.size),
208 	    ("link_elf_protect_range: invalid range %#jx-%#jx",
209 	    (uintmax_t)start, (uintmax_t)end));
210 
211 	if (start == end)
212 		return;
213 	if (ef->preloaded) {
214 #ifdef __amd64__
215 		error = pmap_change_prot(start, end - start, prot);
216 		KASSERT(error == 0,
217 		    ("link_elf_protect_range: pmap_change_prot() returned %d",
218 		    error));
219 #endif
220 		return;
221 	}
222 	error = vm_map_protect(kernel_map, start, end, prot, 0,
223 	    VM_MAP_PROTECT_SET_PROT);
224 	KASSERT(error == KERN_SUCCESS,
225 	    ("link_elf_protect_range: vm_map_protect() returned %d", error));
226 }
227 
228 /*
229  * Restrict permissions on linker file memory based on section flags.
230  * Sections need not be page-aligned, so overlap within a page is possible.
231  */
232 static void
233 link_elf_protect(elf_file_t ef)
234 {
235 	vm_offset_t end, segend, segstart, start;
236 	vm_prot_t gapprot, prot, segprot;
237 	int i;
238 
239 	/*
240 	 * If the file was preloaded, the last page may contain other preloaded
241 	 * data which may need to be writeable.  ELF files are always
242 	 * page-aligned, but other preloaded data, such as entropy or CPU
243 	 * microcode may be loaded with a smaller alignment.
244 	 */
245 	gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
246 
247 	start = end = (vm_offset_t)ef->address;
248 	prot = VM_PROT_READ;
249 	for (i = 0; i < ef->nprogtab; i++) {
250 		/*
251 		 * VNET and DPCPU sections have their memory allocated by their
252 		 * respective subsystems.
253 		 */
254 		if (ef->progtab[i].name != NULL && (
255 #ifdef VIMAGE
256 		    strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
257 #endif
258 		    strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
259 			continue;
260 
261 		segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
262 		segend = round_page((vm_offset_t)ef->progtab[i].addr +
263 		    ef->progtab[i].size);
264 		segprot = VM_PROT_READ;
265 		if ((ef->progtab[i].flags & SHF_WRITE) != 0)
266 			segprot |= VM_PROT_WRITE;
267 		if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
268 			segprot |= VM_PROT_EXECUTE;
269 
270 		if (end <= segstart) {
271 			/*
272 			 * Case 1: there is no overlap between the previous
273 			 * segment and this one.  Apply protections to the
274 			 * previous segment, and protect the gap between the
275 			 * previous and current segments, if any.
276 			 */
277 			link_elf_protect_range(ef, start, end, prot);
278 			link_elf_protect_range(ef, end, segstart, gapprot);
279 
280 			start = segstart;
281 			end = segend;
282 			prot = segprot;
283 		} else if (start < segstart && end == segend) {
284 			/*
285 			 * Case 2: the current segment is a subrange of the
286 			 * previous segment.  Apply protections to the
287 			 * non-overlapping portion of the previous segment.
288 			 */
289 			link_elf_protect_range(ef, start, segstart, prot);
290 
291 			start = segstart;
292 			prot |= segprot;
293 		} else if (end < segend) {
294 			/*
295 			 * Case 3: there is partial overlap between the previous
296 			 * and current segments.  Apply protections to the
297 			 * non-overlapping portion of the previous segment, and
298 			 * then the overlap, which must use the union of the two
299 			 * segments' protections.
300 			 */
301 			link_elf_protect_range(ef, start, segstart, prot);
302 			link_elf_protect_range(ef, segstart, end,
303 			    prot | segprot);
304 			start = end;
305 			end = segend;
306 			prot = segprot;
307 		} else {
308 			/*
309 			 * Case 4: the two segments reside in the same page.
310 			 */
311 			prot |= segprot;
312 		}
313 	}
314 
315 	/*
316 	 * Fix up the last unprotected segment and trailing data.
317 	 */
318 	link_elf_protect_range(ef, start, end, prot);
319 	link_elf_protect_range(ef, end,
320 	    round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
321 }
322 
323 static int
324 link_elf_link_preload(linker_class_t cls, const char *filename,
325     linker_file_t *result)
326 {
327 	Elf_Ehdr *hdr;
328 	Elf_Shdr *shdr;
329 	Elf_Sym *es;
330 	void *modptr, *baseptr, *sizeptr;
331 	char *type;
332 	elf_file_t ef;
333 	linker_file_t lf;
334 	Elf_Addr off;
335 	int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
336 
337 	/* Look to see if we have the file preloaded */
338 	modptr = preload_search_by_name(filename);
339 	if (modptr == NULL)
340 		return ENOENT;
341 
342 	type = (char *)preload_search_info(modptr, MODINFO_TYPE);
343 	baseptr = preload_search_info(modptr, MODINFO_ADDR);
344 	sizeptr = preload_search_info(modptr, MODINFO_SIZE);
345 	hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
346 	    MODINFOMD_ELFHDR);
347 	shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
348 	    MODINFOMD_SHDR);
349 	if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
350 	    " obj module") != 0 &&
351 	    strcmp(type, "elf obj module") != 0)) {
352 		return (EFTYPE);
353 	}
354 	if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
355 	    shdr == NULL)
356 		return (EINVAL);
357 
358 	lf = linker_make_file(filename, &link_elf_class);
359 	if (lf == NULL)
360 		return (ENOMEM);
361 
362 	ef = (elf_file_t)lf;
363 	ef->preloaded = 1;
364 	ef->address = *(caddr_t *)baseptr;
365 	lf->address = *(caddr_t *)baseptr;
366 	lf->size = *(size_t *)sizeptr;
367 
368 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
369 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
370 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
371 	    hdr->e_version != EV_CURRENT ||
372 	    hdr->e_type != ET_REL ||
373 	    hdr->e_machine != ELF_TARG_MACH) {
374 		error = EFTYPE;
375 		goto out;
376 	}
377 	ef->e_shdr = shdr;
378 
379 	/* Scan the section header for information and table sizing. */
380 	symtabindex = -1;
381 	symstrindex = -1;
382 	for (i = 0; i < hdr->e_shnum; i++) {
383 		switch (shdr[i].sh_type) {
384 		case SHT_PROGBITS:
385 		case SHT_NOBITS:
386 #ifdef __amd64__
387 		case SHT_X86_64_UNWIND:
388 #endif
389 			/* Ignore sections not loaded by the loader. */
390 			if (shdr[i].sh_addr == 0)
391 				break;
392 			ef->nprogtab++;
393 			break;
394 		case SHT_SYMTAB:
395 			symtabindex = i;
396 			symstrindex = shdr[i].sh_link;
397 			break;
398 		case SHT_REL:
399 			/*
400 			 * Ignore relocation tables for sections not
401 			 * loaded by the loader.
402 			 */
403 			if (shdr[shdr[i].sh_info].sh_addr == 0)
404 				break;
405 			ef->nreltab++;
406 			break;
407 		case SHT_RELA:
408 			if (shdr[shdr[i].sh_info].sh_addr == 0)
409 				break;
410 			ef->nrelatab++;
411 			break;
412 		}
413 	}
414 
415 	shstrindex = hdr->e_shstrndx;
416 	if (ef->nprogtab == 0 || symstrindex < 0 ||
417 	    symstrindex >= hdr->e_shnum ||
418 	    shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
419 	    shstrindex >= hdr->e_shnum ||
420 	    shdr[shstrindex].sh_type != SHT_STRTAB) {
421 		printf("%s: bad/missing section headers\n", filename);
422 		error = ENOEXEC;
423 		goto out;
424 	}
425 
426 	/* Allocate space for tracking the load chunks */
427 	if (ef->nprogtab != 0)
428 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
429 		    M_LINKER, M_WAITOK | M_ZERO);
430 	if (ef->nreltab != 0)
431 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
432 		    M_LINKER, M_WAITOK | M_ZERO);
433 	if (ef->nrelatab != 0)
434 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
435 		    M_LINKER, M_WAITOK | M_ZERO);
436 	if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
437 	    (ef->nreltab != 0 && ef->reltab == NULL) ||
438 	    (ef->nrelatab != 0 && ef->relatab == NULL)) {
439 		error = ENOMEM;
440 		goto out;
441 	}
442 
443 	/* XXX, relocate the sh_addr fields saved by the loader. */
444 	off = 0;
445 	for (i = 0; i < hdr->e_shnum; i++) {
446 		if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
447 			off = shdr[i].sh_addr;
448 	}
449 	for (i = 0; i < hdr->e_shnum; i++) {
450 		if (shdr[i].sh_addr != 0)
451 			shdr[i].sh_addr = shdr[i].sh_addr - off +
452 			    (Elf_Addr)ef->address;
453 	}
454 
455 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
456 	ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
457 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
458 	ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
459 	ef->shstrcnt = shdr[shstrindex].sh_size;
460 	ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
461 
462 	/* Now fill out progtab and the relocation tables. */
463 	pb = 0;
464 	rl = 0;
465 	ra = 0;
466 	for (i = 0; i < hdr->e_shnum; i++) {
467 		switch (shdr[i].sh_type) {
468 		case SHT_PROGBITS:
469 		case SHT_NOBITS:
470 #ifdef __amd64__
471 		case SHT_X86_64_UNWIND:
472 #endif
473 			if (shdr[i].sh_addr == 0)
474 				break;
475 			ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
476 			if (shdr[i].sh_type == SHT_PROGBITS)
477 				ef->progtab[pb].name = "<<PROGBITS>>";
478 #ifdef __amd64__
479 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
480 				ef->progtab[pb].name = "<<UNWIND>>";
481 #endif
482 			else
483 				ef->progtab[pb].name = "<<NOBITS>>";
484 			ef->progtab[pb].size = shdr[i].sh_size;
485 			ef->progtab[pb].flags = shdr[i].sh_flags;
486 			ef->progtab[pb].sec = i;
487 			if (ef->shstrtab && shdr[i].sh_name != 0)
488 				ef->progtab[pb].name =
489 				    ef->shstrtab + shdr[i].sh_name;
490 			if (ef->progtab[pb].name != NULL &&
491 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
492 				void *dpcpu;
493 
494 				dpcpu = dpcpu_alloc(shdr[i].sh_size);
495 				if (dpcpu == NULL) {
496 					printf("%s: pcpu module space is out "
497 					    "of space; cannot allocate %#jx "
498 					    "for %s\n", __func__,
499 					    (uintmax_t)shdr[i].sh_size,
500 					    filename);
501 					error = ENOSPC;
502 					goto out;
503 				}
504 				memcpy(dpcpu, ef->progtab[pb].addr,
505 				    ef->progtab[pb].size);
506 				dpcpu_copy(dpcpu, shdr[i].sh_size);
507 				ef->progtab[pb].addr = dpcpu;
508 #ifdef VIMAGE
509 			} else if (ef->progtab[pb].name != NULL &&
510 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
511 				void *vnet_data;
512 
513 				vnet_data = vnet_data_alloc(shdr[i].sh_size);
514 				if (vnet_data == NULL) {
515 					printf("%s: vnet module space is out "
516 					    "of space; cannot allocate %#jx "
517 					    "for %s\n", __func__,
518 					    (uintmax_t)shdr[i].sh_size,
519 					    filename);
520 					error = ENOSPC;
521 					goto out;
522 				}
523 				memcpy(vnet_data, ef->progtab[pb].addr,
524 				    ef->progtab[pb].size);
525 				vnet_data_copy(vnet_data, shdr[i].sh_size);
526 				ef->progtab[pb].addr = vnet_data;
527 #endif
528 			} else if (ef->progtab[pb].name != NULL &&
529 			    !strcmp(ef->progtab[pb].name, ".ctors")) {
530 				lf->ctors_addr = ef->progtab[pb].addr;
531 				lf->ctors_size = shdr[i].sh_size;
532 			}
533 
534 			/* Update all symbol values with the offset. */
535 			for (j = 0; j < ef->ddbsymcnt; j++) {
536 				es = &ef->ddbsymtab[j];
537 				if (es->st_shndx != i)
538 					continue;
539 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
540 			}
541 			pb++;
542 			break;
543 		case SHT_REL:
544 			if (shdr[shdr[i].sh_info].sh_addr == 0)
545 				break;
546 			ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
547 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
548 			ef->reltab[rl].sec = shdr[i].sh_info;
549 			rl++;
550 			break;
551 		case SHT_RELA:
552 			if (shdr[shdr[i].sh_info].sh_addr == 0)
553 				break;
554 			ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
555 			ef->relatab[ra].nrela =
556 			    shdr[i].sh_size / sizeof(Elf_Rela);
557 			ef->relatab[ra].sec = shdr[i].sh_info;
558 			ra++;
559 			break;
560 		}
561 	}
562 	if (pb != ef->nprogtab) {
563 		printf("%s: lost progbits\n", filename);
564 		error = ENOEXEC;
565 		goto out;
566 	}
567 	if (rl != ef->nreltab) {
568 		printf("%s: lost reltab\n", filename);
569 		error = ENOEXEC;
570 		goto out;
571 	}
572 	if (ra != ef->nrelatab) {
573 		printf("%s: lost relatab\n", filename);
574 		error = ENOEXEC;
575 		goto out;
576 	}
577 
578 	/*
579 	 * The file needs to be writeable and executable while applying
580 	 * relocations.  Mapping protections are applied once relocation
581 	 * processing is complete.
582 	 */
583 	link_elf_protect_range(ef, (vm_offset_t)ef->address,
584 	    round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
585 
586 	/* Local intra-module relocations */
587 	error = link_elf_reloc_local(lf, false);
588 	if (error != 0)
589 		goto out;
590 	*result = lf;
591 	return (0);
592 
593 out:
594 	/* preload not done this way */
595 	linker_file_unload(lf, LINKER_UNLOAD_FORCE);
596 	return (error);
597 }
598 
599 static void
600 link_elf_invoke_ctors(caddr_t addr, size_t size)
601 {
602 	void (**ctor)(void);
603 	size_t i, cnt;
604 
605 	if (addr == NULL || size == 0)
606 		return;
607 	cnt = size / sizeof(*ctor);
608 	ctor = (void *)addr;
609 	for (i = 0; i < cnt; i++) {
610 		if (ctor[i] != NULL)
611 			(*ctor[i])();
612 	}
613 }
614 
615 static int
616 link_elf_link_preload_finish(linker_file_t lf)
617 {
618 	elf_file_t ef;
619 	int error;
620 
621 	ef = (elf_file_t)lf;
622 	error = relocate_file(ef);
623 	if (error)
624 		return (error);
625 
626 	/* Notify MD code that a module is being loaded. */
627 	error = elf_cpu_load_file(lf);
628 	if (error)
629 		return (error);
630 
631 #if defined(__i386__) || defined(__amd64__)
632 	/* Now ifuncs. */
633 	error = link_elf_reloc_local(lf, true);
634 	if (error != 0)
635 		return (error);
636 #endif
637 
638 	/* Apply protections now that relocation processing is complete. */
639 	link_elf_protect(ef);
640 
641 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
642 	return (0);
643 }
644 
645 static int
646 link_elf_load_file(linker_class_t cls, const char *filename,
647     linker_file_t *result)
648 {
649 	struct nameidata *nd;
650 	struct thread *td = curthread;	/* XXX */
651 	Elf_Ehdr *hdr;
652 	Elf_Shdr *shdr;
653 	Elf_Sym *es;
654 	int nbytes, i, j;
655 	vm_offset_t mapbase;
656 	size_t mapsize;
657 	int error = 0;
658 	ssize_t resid;
659 	int flags;
660 	elf_file_t ef;
661 	linker_file_t lf;
662 	int symtabindex;
663 	int symstrindex;
664 	int shstrindex;
665 	int nsym;
666 	int pb, rl, ra;
667 	int alignmask;
668 
669 	shdr = NULL;
670 	lf = NULL;
671 	mapsize = 0;
672 	hdr = NULL;
673 
674 	nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
675 	NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename, td);
676 	flags = FREAD;
677 	error = vn_open(nd, &flags, 0, NULL);
678 	if (error) {
679 		free(nd, M_TEMP);
680 		return error;
681 	}
682 	NDFREE(nd, NDF_ONLY_PNBUF);
683 	if (nd->ni_vp->v_type != VREG) {
684 		error = ENOEXEC;
685 		goto out;
686 	}
687 #ifdef MAC
688 	error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
689 	if (error) {
690 		goto out;
691 	}
692 #endif
693 
694 	/* Read the elf header from the file. */
695 	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
696 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
697 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
698 	    &resid, td);
699 	if (error)
700 		goto out;
701 	if (resid != 0){
702 		error = ENOEXEC;
703 		goto out;
704 	}
705 
706 	if (!IS_ELF(*hdr)) {
707 		error = ENOEXEC;
708 		goto out;
709 	}
710 
711 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
712 	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
713 		link_elf_error(filename, "Unsupported file layout");
714 		error = ENOEXEC;
715 		goto out;
716 	}
717 	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
718 	    || hdr->e_version != EV_CURRENT) {
719 		link_elf_error(filename, "Unsupported file version");
720 		error = ENOEXEC;
721 		goto out;
722 	}
723 	if (hdr->e_type != ET_REL) {
724 		error = ENOSYS;
725 		goto out;
726 	}
727 	if (hdr->e_machine != ELF_TARG_MACH) {
728 		link_elf_error(filename, "Unsupported machine");
729 		error = ENOEXEC;
730 		goto out;
731 	}
732 
733 	lf = linker_make_file(filename, &link_elf_class);
734 	if (!lf) {
735 		error = ENOMEM;
736 		goto out;
737 	}
738 	ef = (elf_file_t) lf;
739 	ef->nprogtab = 0;
740 	ef->e_shdr = 0;
741 	ef->nreltab = 0;
742 	ef->nrelatab = 0;
743 
744 	/* Allocate and read in the section header */
745 	nbytes = hdr->e_shnum * hdr->e_shentsize;
746 	if (nbytes == 0 || hdr->e_shoff == 0 ||
747 	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
748 		error = ENOEXEC;
749 		goto out;
750 	}
751 	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
752 	ef->e_shdr = shdr;
753 	error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
754 	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
755 	    NOCRED, &resid, td);
756 	if (error)
757 		goto out;
758 	if (resid) {
759 		error = ENOEXEC;
760 		goto out;
761 	}
762 
763 	/* Scan the section header for information and table sizing. */
764 	nsym = 0;
765 	symtabindex = -1;
766 	symstrindex = -1;
767 	for (i = 0; i < hdr->e_shnum; i++) {
768 		if (shdr[i].sh_size == 0)
769 			continue;
770 		switch (shdr[i].sh_type) {
771 		case SHT_PROGBITS:
772 		case SHT_NOBITS:
773 #ifdef __amd64__
774 		case SHT_X86_64_UNWIND:
775 #endif
776 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
777 				break;
778 			ef->nprogtab++;
779 			break;
780 		case SHT_SYMTAB:
781 			nsym++;
782 			symtabindex = i;
783 			symstrindex = shdr[i].sh_link;
784 			break;
785 		case SHT_REL:
786 			/*
787 			 * Ignore relocation tables for unallocated
788 			 * sections.
789 			 */
790 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
791 				break;
792 			ef->nreltab++;
793 			break;
794 		case SHT_RELA:
795 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
796 				break;
797 			ef->nrelatab++;
798 			break;
799 		case SHT_STRTAB:
800 			break;
801 		}
802 	}
803 	if (ef->nprogtab == 0) {
804 		link_elf_error(filename, "file has no contents");
805 		error = ENOEXEC;
806 		goto out;
807 	}
808 	if (nsym != 1) {
809 		/* Only allow one symbol table for now */
810 		link_elf_error(filename,
811 		    "file must have exactly one symbol table");
812 		error = ENOEXEC;
813 		goto out;
814 	}
815 	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
816 	    shdr[symstrindex].sh_type != SHT_STRTAB) {
817 		link_elf_error(filename, "file has invalid symbol strings");
818 		error = ENOEXEC;
819 		goto out;
820 	}
821 
822 	/* Allocate space for tracking the load chunks */
823 	if (ef->nprogtab != 0)
824 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
825 		    M_LINKER, M_WAITOK | M_ZERO);
826 	if (ef->nreltab != 0)
827 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
828 		    M_LINKER, M_WAITOK | M_ZERO);
829 	if (ef->nrelatab != 0)
830 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
831 		    M_LINKER, M_WAITOK | M_ZERO);
832 
833 	if (symtabindex == -1) {
834 		link_elf_error(filename, "lost symbol table index");
835 		error = ENOEXEC;
836 		goto out;
837 	}
838 	/* Allocate space for and load the symbol table */
839 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
840 	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
841 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
842 	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
843 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
844 	    &resid, td);
845 	if (error)
846 		goto out;
847 	if (resid != 0){
848 		error = EINVAL;
849 		goto out;
850 	}
851 
852 	/* Allocate space for and load the symbol strings */
853 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
854 	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
855 	error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
856 	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
857 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
858 	    &resid, td);
859 	if (error)
860 		goto out;
861 	if (resid != 0){
862 		error = EINVAL;
863 		goto out;
864 	}
865 
866 	/* Do we have a string table for the section names?  */
867 	shstrindex = -1;
868 	if (hdr->e_shstrndx != 0 &&
869 	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
870 		shstrindex = hdr->e_shstrndx;
871 		ef->shstrcnt = shdr[shstrindex].sh_size;
872 		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
873 		    M_WAITOK);
874 		error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
875 		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
876 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
877 		    &resid, td);
878 		if (error)
879 			goto out;
880 		if (resid != 0){
881 			error = EINVAL;
882 			goto out;
883 		}
884 	}
885 
886 	/* Size up code/data(progbits) and bss(nobits). */
887 	alignmask = 0;
888 	for (i = 0; i < hdr->e_shnum; i++) {
889 		if (shdr[i].sh_size == 0)
890 			continue;
891 		switch (shdr[i].sh_type) {
892 		case SHT_PROGBITS:
893 		case SHT_NOBITS:
894 #ifdef __amd64__
895 		case SHT_X86_64_UNWIND:
896 #endif
897 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
898 				break;
899 			alignmask = shdr[i].sh_addralign - 1;
900 			mapsize += alignmask;
901 			mapsize &= ~alignmask;
902 			mapsize += shdr[i].sh_size;
903 			break;
904 		}
905 	}
906 
907 	/*
908 	 * We know how much space we need for the text/data/bss/etc.
909 	 * This stuff needs to be in a single chunk so that profiling etc
910 	 * can get the bounds and gdb can associate offsets with modules
911 	 */
912 	ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
913 	    VM_PROT_ALL, 0, thread0.td_ucred);
914 	if (ef->object == NULL) {
915 		error = ENOMEM;
916 		goto out;
917 	}
918 #if VM_NRESERVLEVEL > 0
919 	vm_object_color(ef->object, 0);
920 #endif
921 
922 	/*
923 	 * In order to satisfy amd64's architectural requirements on the
924 	 * location of code and data in the kernel's address space, request a
925 	 * mapping that is above the kernel.
926 	 *
927 	 * Protections will be restricted once relocations are applied.
928 	 */
929 #ifdef __amd64__
930 	mapbase = KERNBASE;
931 #else
932 	mapbase = VM_MIN_KERNEL_ADDRESS;
933 #endif
934 	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
935 	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
936 	    VM_PROT_ALL, 0);
937 	if (error != KERN_SUCCESS) {
938 		vm_object_deallocate(ef->object);
939 		ef->object = NULL;
940 		error = ENOMEM;
941 		goto out;
942 	}
943 
944 	/* Wire the pages */
945 	error = vm_map_wire(kernel_map, mapbase,
946 	    mapbase + round_page(mapsize),
947 	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
948 	if (error != KERN_SUCCESS) {
949 		error = ENOMEM;
950 		goto out;
951 	}
952 
953 	/* Inform the kld system about the situation */
954 	lf->address = ef->address = (caddr_t)mapbase;
955 	lf->size = mapsize;
956 
957 	/*
958 	 * Now load code/data(progbits), zero bss(nobits), allocate space for
959 	 * and load relocs
960 	 */
961 	pb = 0;
962 	rl = 0;
963 	ra = 0;
964 	alignmask = 0;
965 	for (i = 0; i < hdr->e_shnum; i++) {
966 		if (shdr[i].sh_size == 0)
967 			continue;
968 		switch (shdr[i].sh_type) {
969 		case SHT_PROGBITS:
970 		case SHT_NOBITS:
971 #ifdef __amd64__
972 		case SHT_X86_64_UNWIND:
973 #endif
974 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
975 				break;
976 			alignmask = shdr[i].sh_addralign - 1;
977 			mapbase += alignmask;
978 			mapbase &= ~alignmask;
979 			if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
980 				ef->progtab[pb].name =
981 				    ef->shstrtab + shdr[i].sh_name;
982 				if (!strcmp(ef->progtab[pb].name, ".ctors")) {
983 					lf->ctors_addr = (caddr_t)mapbase;
984 					lf->ctors_size = shdr[i].sh_size;
985 				}
986 			} else if (shdr[i].sh_type == SHT_PROGBITS)
987 				ef->progtab[pb].name = "<<PROGBITS>>";
988 #ifdef __amd64__
989 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
990 				ef->progtab[pb].name = "<<UNWIND>>";
991 #endif
992 			else
993 				ef->progtab[pb].name = "<<NOBITS>>";
994 			if (ef->progtab[pb].name != NULL &&
995 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
996 				ef->progtab[pb].addr =
997 				    dpcpu_alloc(shdr[i].sh_size);
998 				if (ef->progtab[pb].addr == NULL) {
999 					printf("%s: pcpu module space is out "
1000 					    "of space; cannot allocate %#jx "
1001 					    "for %s\n", __func__,
1002 					    (uintmax_t)shdr[i].sh_size,
1003 					    filename);
1004 				}
1005 			}
1006 #ifdef VIMAGE
1007 			else if (ef->progtab[pb].name != NULL &&
1008 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1009 				ef->progtab[pb].addr =
1010 				    vnet_data_alloc(shdr[i].sh_size);
1011 				if (ef->progtab[pb].addr == NULL) {
1012 					printf("%s: vnet module space is out "
1013 					    "of space; cannot allocate %#jx "
1014 					    "for %s\n", __func__,
1015 					    (uintmax_t)shdr[i].sh_size,
1016 					    filename);
1017 				}
1018 			}
1019 #endif
1020 			else
1021 				ef->progtab[pb].addr =
1022 				    (void *)(uintptr_t)mapbase;
1023 			if (ef->progtab[pb].addr == NULL) {
1024 				error = ENOSPC;
1025 				goto out;
1026 			}
1027 			ef->progtab[pb].size = shdr[i].sh_size;
1028 			ef->progtab[pb].flags = shdr[i].sh_flags;
1029 			ef->progtab[pb].sec = i;
1030 			if (shdr[i].sh_type == SHT_PROGBITS
1031 #ifdef __amd64__
1032 			    || shdr[i].sh_type == SHT_X86_64_UNWIND
1033 #endif
1034 			    ) {
1035 				error = vn_rdwr(UIO_READ, nd->ni_vp,
1036 				    ef->progtab[pb].addr,
1037 				    shdr[i].sh_size, shdr[i].sh_offset,
1038 				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1039 				    NOCRED, &resid, td);
1040 				if (error)
1041 					goto out;
1042 				if (resid != 0){
1043 					error = EINVAL;
1044 					goto out;
1045 				}
1046 				/* Initialize the per-cpu or vnet area. */
1047 				if (ef->progtab[pb].addr != (void *)mapbase &&
1048 				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1049 					dpcpu_copy(ef->progtab[pb].addr,
1050 					    shdr[i].sh_size);
1051 #ifdef VIMAGE
1052 				else if (ef->progtab[pb].addr !=
1053 				    (void *)mapbase &&
1054 				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1055 					vnet_data_copy(ef->progtab[pb].addr,
1056 					    shdr[i].sh_size);
1057 #endif
1058 			} else
1059 				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1060 
1061 			/* Update all symbol values with the offset. */
1062 			for (j = 0; j < ef->ddbsymcnt; j++) {
1063 				es = &ef->ddbsymtab[j];
1064 				if (es->st_shndx != i)
1065 					continue;
1066 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1067 			}
1068 			mapbase += shdr[i].sh_size;
1069 			pb++;
1070 			break;
1071 		case SHT_REL:
1072 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1073 				break;
1074 			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1075 			    M_WAITOK);
1076 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1077 			ef->reltab[rl].sec = shdr[i].sh_info;
1078 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1079 			    (void *)ef->reltab[rl].rel,
1080 			    shdr[i].sh_size, shdr[i].sh_offset,
1081 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1082 			    &resid, td);
1083 			if (error)
1084 				goto out;
1085 			if (resid != 0){
1086 				error = EINVAL;
1087 				goto out;
1088 			}
1089 			rl++;
1090 			break;
1091 		case SHT_RELA:
1092 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1093 				break;
1094 			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1095 			    M_WAITOK);
1096 			ef->relatab[ra].nrela =
1097 			    shdr[i].sh_size / sizeof(Elf_Rela);
1098 			ef->relatab[ra].sec = shdr[i].sh_info;
1099 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1100 			    (void *)ef->relatab[ra].rela,
1101 			    shdr[i].sh_size, shdr[i].sh_offset,
1102 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1103 			    &resid, td);
1104 			if (error)
1105 				goto out;
1106 			if (resid != 0){
1107 				error = EINVAL;
1108 				goto out;
1109 			}
1110 			ra++;
1111 			break;
1112 		}
1113 	}
1114 	if (pb != ef->nprogtab) {
1115 		link_elf_error(filename, "lost progbits");
1116 		error = ENOEXEC;
1117 		goto out;
1118 	}
1119 	if (rl != ef->nreltab) {
1120 		link_elf_error(filename, "lost reltab");
1121 		error = ENOEXEC;
1122 		goto out;
1123 	}
1124 	if (ra != ef->nrelatab) {
1125 		link_elf_error(filename, "lost relatab");
1126 		error = ENOEXEC;
1127 		goto out;
1128 	}
1129 	if (mapbase != (vm_offset_t)ef->address + mapsize) {
1130 		printf(
1131 		    "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1132 		    filename != NULL ? filename : "<none>",
1133 		    (u_long)mapbase, ef->address, (u_long)mapsize,
1134 		    (u_long)(vm_offset_t)ef->address + mapsize);
1135 		error = ENOMEM;
1136 		goto out;
1137 	}
1138 
1139 	/* Local intra-module relocations */
1140 	error = link_elf_reloc_local(lf, false);
1141 	if (error != 0)
1142 		goto out;
1143 
1144 	/* Pull in dependencies */
1145 	VOP_UNLOCK(nd->ni_vp);
1146 	error = linker_load_dependencies(lf);
1147 	vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1148 	if (error)
1149 		goto out;
1150 
1151 	/* External relocations */
1152 	error = relocate_file(ef);
1153 	if (error)
1154 		goto out;
1155 
1156 	/* Notify MD code that a module is being loaded. */
1157 	error = elf_cpu_load_file(lf);
1158 	if (error)
1159 		goto out;
1160 
1161 #if defined(__i386__) || defined(__amd64__)
1162 	/* Now ifuncs. */
1163 	error = link_elf_reloc_local(lf, true);
1164 	if (error != 0)
1165 		goto out;
1166 #endif
1167 
1168 	link_elf_protect(ef);
1169 	link_elf_invoke_ctors(lf->ctors_addr, lf->ctors_size);
1170 	*result = lf;
1171 
1172 out:
1173 	VOP_UNLOCK(nd->ni_vp);
1174 	vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1175 	free(nd, M_TEMP);
1176 	if (error && lf)
1177 		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1178 	free(hdr, M_LINKER);
1179 
1180 	return error;
1181 }
1182 
1183 static void
1184 link_elf_unload_file(linker_file_t file)
1185 {
1186 	elf_file_t ef = (elf_file_t) file;
1187 	u_int i;
1188 
1189 	/* Notify MD code that a module is being unloaded. */
1190 	elf_cpu_unload_file(file);
1191 
1192 	if (ef->progtab) {
1193 		for (i = 0; i < ef->nprogtab; i++) {
1194 			if (ef->progtab[i].size == 0)
1195 				continue;
1196 			if (ef->progtab[i].name == NULL)
1197 				continue;
1198 			if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1199 				dpcpu_free(ef->progtab[i].addr,
1200 				    ef->progtab[i].size);
1201 #ifdef VIMAGE
1202 			else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1203 				vnet_data_free(ef->progtab[i].addr,
1204 				    ef->progtab[i].size);
1205 #endif
1206 		}
1207 	}
1208 	if (ef->preloaded) {
1209 		free(ef->reltab, M_LINKER);
1210 		free(ef->relatab, M_LINKER);
1211 		free(ef->progtab, M_LINKER);
1212 		free(ef->ctftab, M_LINKER);
1213 		free(ef->ctfoff, M_LINKER);
1214 		free(ef->typoff, M_LINKER);
1215 		if (file->pathname != NULL)
1216 			preload_delete_name(file->pathname);
1217 		return;
1218 	}
1219 
1220 	for (i = 0; i < ef->nreltab; i++)
1221 		free(ef->reltab[i].rel, M_LINKER);
1222 	for (i = 0; i < ef->nrelatab; i++)
1223 		free(ef->relatab[i].rela, M_LINKER);
1224 	free(ef->reltab, M_LINKER);
1225 	free(ef->relatab, M_LINKER);
1226 	free(ef->progtab, M_LINKER);
1227 
1228 	if (ef->object != NULL)
1229 		vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1230 		    (vm_offset_t)ef->address + ptoa(ef->object->size));
1231 	free(ef->e_shdr, M_LINKER);
1232 	free(ef->ddbsymtab, M_LINKER);
1233 	free(ef->ddbstrtab, M_LINKER);
1234 	free(ef->shstrtab, M_LINKER);
1235 	free(ef->ctftab, M_LINKER);
1236 	free(ef->ctfoff, M_LINKER);
1237 	free(ef->typoff, M_LINKER);
1238 }
1239 
1240 static const char *
1241 symbol_name(elf_file_t ef, Elf_Size r_info)
1242 {
1243 	const Elf_Sym *ref;
1244 
1245 	if (ELF_R_SYM(r_info)) {
1246 		ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1247 		return ef->ddbstrtab + ref->st_name;
1248 	} else
1249 		return NULL;
1250 }
1251 
1252 static Elf_Addr
1253 findbase(elf_file_t ef, int sec)
1254 {
1255 	int i;
1256 	Elf_Addr base = 0;
1257 
1258 	for (i = 0; i < ef->nprogtab; i++) {
1259 		if (sec == ef->progtab[i].sec) {
1260 			base = (Elf_Addr)ef->progtab[i].addr;
1261 			break;
1262 		}
1263 	}
1264 	return base;
1265 }
1266 
1267 static int
1268 relocate_file(elf_file_t ef)
1269 {
1270 	const Elf_Rel *rellim;
1271 	const Elf_Rel *rel;
1272 	const Elf_Rela *relalim;
1273 	const Elf_Rela *rela;
1274 	const char *symname;
1275 	const Elf_Sym *sym;
1276 	int i;
1277 	Elf_Size symidx;
1278 	Elf_Addr base;
1279 
1280 	/* Perform relocations without addend if there are any: */
1281 	for (i = 0; i < ef->nreltab; i++) {
1282 		rel = ef->reltab[i].rel;
1283 		if (rel == NULL) {
1284 			link_elf_error(ef->lf.filename, "lost a reltab!");
1285 			return (ENOEXEC);
1286 		}
1287 		rellim = rel + ef->reltab[i].nrel;
1288 		base = findbase(ef, ef->reltab[i].sec);
1289 		if (base == 0) {
1290 			link_elf_error(ef->lf.filename, "lost base for reltab");
1291 			return (ENOEXEC);
1292 		}
1293 		for ( ; rel < rellim; rel++) {
1294 			symidx = ELF_R_SYM(rel->r_info);
1295 			if (symidx >= ef->ddbsymcnt)
1296 				continue;
1297 			sym = ef->ddbsymtab + symidx;
1298 			/* Local relocs are already done */
1299 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1300 				continue;
1301 			if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1302 			    elf_obj_lookup)) {
1303 				symname = symbol_name(ef, rel->r_info);
1304 				printf("link_elf_obj: symbol %s undefined\n",
1305 				    symname);
1306 				return (ENOENT);
1307 			}
1308 		}
1309 	}
1310 
1311 	/* Perform relocations with addend if there are any: */
1312 	for (i = 0; i < ef->nrelatab; i++) {
1313 		rela = ef->relatab[i].rela;
1314 		if (rela == NULL) {
1315 			link_elf_error(ef->lf.filename, "lost a relatab!");
1316 			return (ENOEXEC);
1317 		}
1318 		relalim = rela + ef->relatab[i].nrela;
1319 		base = findbase(ef, ef->relatab[i].sec);
1320 		if (base == 0) {
1321 			link_elf_error(ef->lf.filename,
1322 			    "lost base for relatab");
1323 			return (ENOEXEC);
1324 		}
1325 		for ( ; rela < relalim; rela++) {
1326 			symidx = ELF_R_SYM(rela->r_info);
1327 			if (symidx >= ef->ddbsymcnt)
1328 				continue;
1329 			sym = ef->ddbsymtab + symidx;
1330 			/* Local relocs are already done */
1331 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1332 				continue;
1333 			if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1334 			    elf_obj_lookup)) {
1335 				symname = symbol_name(ef, rela->r_info);
1336 				printf("link_elf_obj: symbol %s undefined\n",
1337 				    symname);
1338 				return (ENOENT);
1339 			}
1340 		}
1341 	}
1342 
1343 	/*
1344 	 * Only clean SHN_FBSD_CACHED for successful return.  If we
1345 	 * modified symbol table for the object but found an
1346 	 * unresolved symbol, there is no reason to roll back.
1347 	 */
1348 	elf_obj_cleanup_globals_cache(ef);
1349 
1350 	return (0);
1351 }
1352 
1353 static int
1354 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1355 {
1356 	elf_file_t ef = (elf_file_t) lf;
1357 	const Elf_Sym *symp;
1358 	const char *strp;
1359 	int i;
1360 
1361 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1362 		strp = ef->ddbstrtab + symp->st_name;
1363 		if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1364 			*sym = (c_linker_sym_t) symp;
1365 			return 0;
1366 		}
1367 	}
1368 	return ENOENT;
1369 }
1370 
1371 static int
1372 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1373     linker_symval_t *symval)
1374 {
1375 	elf_file_t ef;
1376 	const Elf_Sym *es;
1377 	caddr_t val;
1378 
1379 	ef = (elf_file_t) lf;
1380 	es = (const Elf_Sym*) sym;
1381 	val = (caddr_t)es->st_value;
1382 	if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1383 		symval->name = ef->ddbstrtab + es->st_name;
1384 		val = (caddr_t)es->st_value;
1385 		if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1386 			val = ((caddr_t (*)(void))val)();
1387 		symval->value = val;
1388 		symval->size = es->st_size;
1389 		return 0;
1390 	}
1391 	return ENOENT;
1392 }
1393 
1394 static int
1395 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1396     c_linker_sym_t *sym, long *diffp)
1397 {
1398 	elf_file_t ef = (elf_file_t) lf;
1399 	u_long off = (uintptr_t) (void *) value;
1400 	u_long diff = off;
1401 	u_long st_value;
1402 	const Elf_Sym *es;
1403 	const Elf_Sym *best = NULL;
1404 	int i;
1405 
1406 	for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1407 		if (es->st_name == 0)
1408 			continue;
1409 		st_value = es->st_value;
1410 		if (off >= st_value) {
1411 			if (off - st_value < diff) {
1412 				diff = off - st_value;
1413 				best = es;
1414 				if (diff == 0)
1415 					break;
1416 			} else if (off - st_value == diff) {
1417 				best = es;
1418 			}
1419 		}
1420 	}
1421 	if (best == NULL)
1422 		*diffp = off;
1423 	else
1424 		*diffp = diff;
1425 	*sym = (c_linker_sym_t) best;
1426 
1427 	return 0;
1428 }
1429 
1430 /*
1431  * Look up a linker set on an ELF system.
1432  */
1433 static int
1434 link_elf_lookup_set(linker_file_t lf, const char *name,
1435     void ***startp, void ***stopp, int *countp)
1436 {
1437 	elf_file_t ef = (elf_file_t)lf;
1438 	void **start, **stop;
1439 	int i, count;
1440 
1441 	/* Relative to section number */
1442 	for (i = 0; i < ef->nprogtab; i++) {
1443 		if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1444 		    strcmp(ef->progtab[i].name + 4, name) == 0) {
1445 			start  = (void **)ef->progtab[i].addr;
1446 			stop = (void **)((char *)ef->progtab[i].addr +
1447 			    ef->progtab[i].size);
1448 			count = stop - start;
1449 			if (startp)
1450 				*startp = start;
1451 			if (stopp)
1452 				*stopp = stop;
1453 			if (countp)
1454 				*countp = count;
1455 			return (0);
1456 		}
1457 	}
1458 	return (ESRCH);
1459 }
1460 
1461 static int
1462 link_elf_each_function_name(linker_file_t file,
1463     int (*callback)(const char *, void *), void *opaque)
1464 {
1465 	elf_file_t ef = (elf_file_t)file;
1466 	const Elf_Sym *symp;
1467 	int i, error;
1468 
1469 	/* Exhaustive search */
1470 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1471 		if (symp->st_value != 0 &&
1472 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1473 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1474 			error = callback(ef->ddbstrtab + symp->st_name, opaque);
1475 			if (error)
1476 				return (error);
1477 		}
1478 	}
1479 	return (0);
1480 }
1481 
1482 static int
1483 link_elf_each_function_nameval(linker_file_t file,
1484     linker_function_nameval_callback_t callback, void *opaque)
1485 {
1486 	linker_symval_t symval;
1487 	elf_file_t ef = (elf_file_t)file;
1488 	const Elf_Sym* symp;
1489 	int i, error;
1490 
1491 	/* Exhaustive search */
1492 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1493 		if (symp->st_value != 0 &&
1494 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1495 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1496 			error = link_elf_symbol_values(file,
1497 			    (c_linker_sym_t)symp, &symval);
1498 			if (error)
1499 				return (error);
1500 			error = callback(file, i, &symval, opaque);
1501 			if (error)
1502 				return (error);
1503 		}
1504 	}
1505 	return (0);
1506 }
1507 
1508 static void
1509 elf_obj_cleanup_globals_cache(elf_file_t ef)
1510 {
1511 	Elf_Sym *sym;
1512 	Elf_Size i;
1513 
1514 	for (i = 0; i < ef->ddbsymcnt; i++) {
1515 		sym = ef->ddbsymtab + i;
1516 		if (sym->st_shndx == SHN_FBSD_CACHED) {
1517 			sym->st_shndx = SHN_UNDEF;
1518 			sym->st_value = 0;
1519 		}
1520 	}
1521 }
1522 
1523 /*
1524  * Symbol lookup function that can be used when the symbol index is known (ie
1525  * in relocations). It uses the symbol index instead of doing a fully fledged
1526  * hash table based lookup when such is valid. For example for local symbols.
1527  * This is not only more efficient, it's also more correct. It's not always
1528  * the case that the symbol can be found through the hash table.
1529  */
1530 static int
1531 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1532 {
1533 	elf_file_t ef = (elf_file_t)lf;
1534 	Elf_Sym *sym;
1535 	const char *symbol;
1536 	Elf_Addr res1;
1537 
1538 	/* Don't even try to lookup the symbol if the index is bogus. */
1539 	if (symidx >= ef->ddbsymcnt) {
1540 		*res = 0;
1541 		return (EINVAL);
1542 	}
1543 
1544 	sym = ef->ddbsymtab + symidx;
1545 
1546 	/* Quick answer if there is a definition included. */
1547 	if (sym->st_shndx != SHN_UNDEF) {
1548 		res1 = (Elf_Addr)sym->st_value;
1549 		if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1550 			res1 = ((Elf_Addr (*)(void))res1)();
1551 		*res = res1;
1552 		return (0);
1553 	}
1554 
1555 	/* If we get here, then it is undefined and needs a lookup. */
1556 	switch (ELF_ST_BIND(sym->st_info)) {
1557 	case STB_LOCAL:
1558 		/* Local, but undefined? huh? */
1559 		*res = 0;
1560 		return (EINVAL);
1561 
1562 	case STB_GLOBAL:
1563 	case STB_WEAK:
1564 		/* Relative to Data or Function name */
1565 		symbol = ef->ddbstrtab + sym->st_name;
1566 
1567 		/* Force a lookup failure if the symbol name is bogus. */
1568 		if (*symbol == 0) {
1569 			*res = 0;
1570 			return (EINVAL);
1571 		}
1572 		res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1573 
1574 		/*
1575 		 * Cache global lookups during module relocation. The failure
1576 		 * case is particularly expensive for callers, who must scan
1577 		 * through the entire globals table doing strcmp(). Cache to
1578 		 * avoid doing such work repeatedly.
1579 		 *
1580 		 * After relocation is complete, undefined globals will be
1581 		 * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1582 		 * above.
1583 		 */
1584 		if (res1 != 0) {
1585 			sym->st_shndx = SHN_FBSD_CACHED;
1586 			sym->st_value = res1;
1587 			*res = res1;
1588 			return (0);
1589 		} else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1590 			sym->st_value = 0;
1591 			*res = 0;
1592 			return (0);
1593 		}
1594 		return (EINVAL);
1595 
1596 	default:
1597 		return (EINVAL);
1598 	}
1599 }
1600 
1601 static void
1602 link_elf_fix_link_set(elf_file_t ef)
1603 {
1604 	static const char startn[] = "__start_";
1605 	static const char stopn[] = "__stop_";
1606 	Elf_Sym *sym;
1607 	const char *sym_name, *linkset_name;
1608 	Elf_Addr startp, stopp;
1609 	Elf_Size symidx;
1610 	int start, i;
1611 
1612 	startp = stopp = 0;
1613 	for (symidx = 1 /* zero entry is special */;
1614 		symidx < ef->ddbsymcnt; symidx++) {
1615 		sym = ef->ddbsymtab + symidx;
1616 		if (sym->st_shndx != SHN_UNDEF)
1617 			continue;
1618 
1619 		sym_name = ef->ddbstrtab + sym->st_name;
1620 		if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1621 			start = 1;
1622 			linkset_name = sym_name + sizeof(startn) - 1;
1623 		}
1624 		else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1625 			start = 0;
1626 			linkset_name = sym_name + sizeof(stopn) - 1;
1627 		}
1628 		else
1629 			continue;
1630 
1631 		for (i = 0; i < ef->nprogtab; i++) {
1632 			if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1633 				startp = (Elf_Addr)ef->progtab[i].addr;
1634 				stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1635 				break;
1636 			}
1637 		}
1638 		if (i == ef->nprogtab)
1639 			continue;
1640 
1641 		sym->st_value = start ? startp : stopp;
1642 		sym->st_shndx = i;
1643 	}
1644 }
1645 
1646 static int
1647 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1648 {
1649 	elf_file_t ef = (elf_file_t)lf;
1650 	const Elf_Rel *rellim;
1651 	const Elf_Rel *rel;
1652 	const Elf_Rela *relalim;
1653 	const Elf_Rela *rela;
1654 	const Elf_Sym *sym;
1655 	Elf_Addr base;
1656 	int i;
1657 	Elf_Size symidx;
1658 
1659 	link_elf_fix_link_set(ef);
1660 
1661 	/* Perform relocations without addend if there are any: */
1662 	for (i = 0; i < ef->nreltab; i++) {
1663 		rel = ef->reltab[i].rel;
1664 		if (rel == NULL) {
1665 			link_elf_error(ef->lf.filename, "lost a reltab");
1666 			return (ENOEXEC);
1667 		}
1668 		rellim = rel + ef->reltab[i].nrel;
1669 		base = findbase(ef, ef->reltab[i].sec);
1670 		if (base == 0) {
1671 			link_elf_error(ef->lf.filename, "lost base for reltab");
1672 			return (ENOEXEC);
1673 		}
1674 		for ( ; rel < rellim; rel++) {
1675 			symidx = ELF_R_SYM(rel->r_info);
1676 			if (symidx >= ef->ddbsymcnt)
1677 				continue;
1678 			sym = ef->ddbsymtab + symidx;
1679 			/* Only do local relocs */
1680 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1681 				continue;
1682 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1683 			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1684 				continue;
1685 			if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1686 			    elf_obj_lookup) != 0)
1687 				return (ENOEXEC);
1688 		}
1689 	}
1690 
1691 	/* Perform relocations with addend if there are any: */
1692 	for (i = 0; i < ef->nrelatab; i++) {
1693 		rela = ef->relatab[i].rela;
1694 		if (rela == NULL) {
1695 			link_elf_error(ef->lf.filename, "lost a relatab!");
1696 			return (ENOEXEC);
1697 		}
1698 		relalim = rela + ef->relatab[i].nrela;
1699 		base = findbase(ef, ef->relatab[i].sec);
1700 		if (base == 0) {
1701 			link_elf_error(ef->lf.filename, "lost base for reltab");
1702 			return (ENOEXEC);
1703 		}
1704 		for ( ; rela < relalim; rela++) {
1705 			symidx = ELF_R_SYM(rela->r_info);
1706 			if (symidx >= ef->ddbsymcnt)
1707 				continue;
1708 			sym = ef->ddbsymtab + symidx;
1709 			/* Only do local relocs */
1710 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1711 				continue;
1712 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1713 			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1714 				continue;
1715 			if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1716 			    elf_obj_lookup) != 0)
1717 				return (ENOEXEC);
1718 		}
1719 	}
1720 	return (0);
1721 }
1722 
1723 static long
1724 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1725 {
1726     elf_file_t ef = (elf_file_t)lf;
1727 
1728     *symtab = ef->ddbsymtab;
1729 
1730     if (*symtab == NULL)
1731         return (0);
1732 
1733     return (ef->ddbsymcnt);
1734 }
1735 
1736 static long
1737 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1738 {
1739     elf_file_t ef = (elf_file_t)lf;
1740 
1741     *strtab = ef->ddbstrtab;
1742 
1743     if (*strtab == NULL)
1744         return (0);
1745 
1746     return (ef->ddbstrcnt);
1747 }
1748