xref: /freebsd/sys/kern/link_elf_obj.c (revision 7e1d3eefd410ca0fbae5a217422821244c3eeee4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998-2000 Doug Rabson
5  * Copyright (c) 2004 Peter Wemm
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "opt_ddb.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/linker.h>
42 #include <sys/mutex.h>
43 #include <sys/mount.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/rwlock.h>
47 #include <sys/vnode.h>
48 
49 #include <machine/elf.h>
50 
51 #include <net/vnet.h>
52 
53 #include <security/mac/mac_framework.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_param.h>
57 #include <vm/pmap.h>
58 #include <vm/vm_extern.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64 
65 #include <sys/link_elf.h>
66 
67 #ifdef DDB_CTF
68 #include <contrib/zlib/zlib.h>
69 #endif
70 
71 #include "linker_if.h"
72 
73 typedef struct {
74 	void		*addr;
75 	Elf_Off		size;
76 	int		flags;	/* Section flags. */
77 	int		sec;	/* Original section number. */
78 	char		*name;
79 } Elf_progent;
80 
81 typedef struct {
82 	Elf_Rel		*rel;
83 	int		nrel;
84 	int		sec;
85 } Elf_relent;
86 
87 typedef struct {
88 	Elf_Rela	*rela;
89 	int		nrela;
90 	int		sec;
91 } Elf_relaent;
92 
93 typedef struct elf_file {
94 	struct linker_file lf;		/* Common fields */
95 
96 	int		preloaded;
97 	caddr_t		address;	/* Relocation address */
98 	vm_object_t	object;		/* VM object to hold file pages */
99 	Elf_Shdr	*e_shdr;
100 
101 	Elf_progent	*progtab;
102 	u_int		nprogtab;
103 
104 	Elf_relaent	*relatab;
105 	u_int		nrelatab;
106 
107 	Elf_relent	*reltab;
108 	int		nreltab;
109 
110 	Elf_Sym		*ddbsymtab;	/* The symbol table we are using */
111 	long		ddbsymcnt;	/* Number of symbols */
112 	caddr_t		ddbstrtab;	/* String table */
113 	long		ddbstrcnt;	/* number of bytes in string table */
114 
115 	caddr_t		shstrtab;	/* Section name string table */
116 	long		shstrcnt;	/* number of bytes in string table */
117 
118 	caddr_t		ctftab;		/* CTF table */
119 	long		ctfcnt;		/* number of bytes in CTF table */
120 	caddr_t		ctfoff;		/* CTF offset table */
121 	caddr_t		typoff;		/* Type offset table */
122 	long		typlen;		/* Number of type entries. */
123 
124 } *elf_file_t;
125 
126 #include <kern/kern_ctf.c>
127 
128 static int	link_elf_link_preload(linker_class_t cls,
129 		    const char *, linker_file_t *);
130 static int	link_elf_link_preload_finish(linker_file_t);
131 static int	link_elf_load_file(linker_class_t, const char *, linker_file_t *);
132 static int	link_elf_lookup_symbol(linker_file_t, const char *,
133 		    c_linker_sym_t *);
134 static int	link_elf_symbol_values(linker_file_t, c_linker_sym_t,
135 		    linker_symval_t *);
136 static int	link_elf_search_symbol(linker_file_t, caddr_t value,
137 		    c_linker_sym_t *sym, long *diffp);
138 
139 static void	link_elf_unload_file(linker_file_t);
140 static int	link_elf_lookup_set(linker_file_t, const char *,
141 		    void ***, void ***, int *);
142 static int	link_elf_each_function_name(linker_file_t,
143 		    int (*)(const char *, void *), void *);
144 static int	link_elf_each_function_nameval(linker_file_t,
145 				linker_function_nameval_callback_t,
146 				void *);
147 static int	link_elf_reloc_local(linker_file_t, bool);
148 static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
149 static long	link_elf_strtab_get(linker_file_t, caddr_t *);
150 
151 static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
152 		    Elf_Addr *);
153 
154 static kobj_method_t link_elf_methods[] = {
155 	KOBJMETHOD(linker_lookup_symbol,	link_elf_lookup_symbol),
156 	KOBJMETHOD(linker_symbol_values,	link_elf_symbol_values),
157 	KOBJMETHOD(linker_search_symbol,	link_elf_search_symbol),
158 	KOBJMETHOD(linker_unload,		link_elf_unload_file),
159 	KOBJMETHOD(linker_load_file,		link_elf_load_file),
160 	KOBJMETHOD(linker_link_preload,		link_elf_link_preload),
161 	KOBJMETHOD(linker_link_preload_finish,	link_elf_link_preload_finish),
162 	KOBJMETHOD(linker_lookup_set,		link_elf_lookup_set),
163 	KOBJMETHOD(linker_each_function_name,	link_elf_each_function_name),
164 	KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
165 	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
166 	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
167 	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
168 	KOBJMETHOD_END
169 };
170 
171 static struct linker_class link_elf_class = {
172 #if ELF_TARG_CLASS == ELFCLASS32
173 	"elf32_obj",
174 #else
175 	"elf64_obj",
176 #endif
177 	link_elf_methods, sizeof(struct elf_file)
178 };
179 
180 static int	relocate_file(elf_file_t ef);
181 static void	elf_obj_cleanup_globals_cache(elf_file_t);
182 
183 static void
184 link_elf_error(const char *filename, const char *s)
185 {
186 	if (filename == NULL)
187 		printf("kldload: %s\n", s);
188 	else
189 		printf("kldload: %s: %s\n", filename, s);
190 }
191 
192 static void
193 link_elf_init(void *arg)
194 {
195 
196 	linker_add_class(&link_elf_class);
197 }
198 SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
199 
200 static void
201 link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
202     vm_prot_t prot)
203 {
204 	int error __unused;
205 
206 	KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
207 	    end <= round_page((vm_offset_t)ef->address + ef->lf.size),
208 	    ("link_elf_protect_range: invalid range %#jx-%#jx",
209 	    (uintmax_t)start, (uintmax_t)end));
210 
211 	if (start == end)
212 		return;
213 	if (ef->preloaded) {
214 #ifdef __amd64__
215 		error = pmap_change_prot(start, end - start, prot);
216 		KASSERT(error == 0,
217 		    ("link_elf_protect_range: pmap_change_prot() returned %d",
218 		    error));
219 #endif
220 		return;
221 	}
222 	error = vm_map_protect(kernel_map, start, end, prot, 0,
223 	    VM_MAP_PROTECT_SET_PROT);
224 	KASSERT(error == KERN_SUCCESS,
225 	    ("link_elf_protect_range: vm_map_protect() returned %d", error));
226 }
227 
228 /*
229  * Restrict permissions on linker file memory based on section flags.
230  * Sections need not be page-aligned, so overlap within a page is possible.
231  */
232 static void
233 link_elf_protect(elf_file_t ef)
234 {
235 	vm_offset_t end, segend, segstart, start;
236 	vm_prot_t gapprot, prot, segprot;
237 	int i;
238 
239 	/*
240 	 * If the file was preloaded, the last page may contain other preloaded
241 	 * data which may need to be writeable.  ELF files are always
242 	 * page-aligned, but other preloaded data, such as entropy or CPU
243 	 * microcode may be loaded with a smaller alignment.
244 	 */
245 	gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
246 
247 	start = end = (vm_offset_t)ef->address;
248 	prot = VM_PROT_READ;
249 	for (i = 0; i < ef->nprogtab; i++) {
250 		/*
251 		 * VNET and DPCPU sections have their memory allocated by their
252 		 * respective subsystems.
253 		 */
254 		if (ef->progtab[i].name != NULL && (
255 #ifdef VIMAGE
256 		    strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
257 #endif
258 		    strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
259 			continue;
260 
261 		segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
262 		segend = round_page((vm_offset_t)ef->progtab[i].addr +
263 		    ef->progtab[i].size);
264 		segprot = VM_PROT_READ;
265 		if ((ef->progtab[i].flags & SHF_WRITE) != 0)
266 			segprot |= VM_PROT_WRITE;
267 		if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
268 			segprot |= VM_PROT_EXECUTE;
269 
270 		if (end <= segstart) {
271 			/*
272 			 * Case 1: there is no overlap between the previous
273 			 * segment and this one.  Apply protections to the
274 			 * previous segment, and protect the gap between the
275 			 * previous and current segments, if any.
276 			 */
277 			link_elf_protect_range(ef, start, end, prot);
278 			link_elf_protect_range(ef, end, segstart, gapprot);
279 
280 			start = segstart;
281 			end = segend;
282 			prot = segprot;
283 		} else if (start < segstart && end == segend) {
284 			/*
285 			 * Case 2: the current segment is a subrange of the
286 			 * previous segment.  Apply protections to the
287 			 * non-overlapping portion of the previous segment.
288 			 */
289 			link_elf_protect_range(ef, start, segstart, prot);
290 
291 			start = segstart;
292 			prot |= segprot;
293 		} else if (end < segend) {
294 			/*
295 			 * Case 3: there is partial overlap between the previous
296 			 * and current segments.  Apply protections to the
297 			 * non-overlapping portion of the previous segment, and
298 			 * then the overlap, which must use the union of the two
299 			 * segments' protections.
300 			 */
301 			link_elf_protect_range(ef, start, segstart, prot);
302 			link_elf_protect_range(ef, segstart, end,
303 			    prot | segprot);
304 			start = end;
305 			end = segend;
306 			prot = segprot;
307 		} else {
308 			/*
309 			 * Case 4: the two segments reside in the same page.
310 			 */
311 			prot |= segprot;
312 		}
313 	}
314 
315 	/*
316 	 * Fix up the last unprotected segment and trailing data.
317 	 */
318 	link_elf_protect_range(ef, start, end, prot);
319 	link_elf_protect_range(ef, end,
320 	    round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
321 }
322 
323 static int
324 link_elf_link_preload(linker_class_t cls, const char *filename,
325     linker_file_t *result)
326 {
327 	Elf_Ehdr *hdr;
328 	Elf_Shdr *shdr;
329 	Elf_Sym *es;
330 	void *modptr, *baseptr, *sizeptr;
331 	char *type;
332 	elf_file_t ef;
333 	linker_file_t lf;
334 	Elf_Addr off;
335 	int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
336 
337 	/* Look to see if we have the file preloaded */
338 	modptr = preload_search_by_name(filename);
339 	if (modptr == NULL)
340 		return ENOENT;
341 
342 	type = (char *)preload_search_info(modptr, MODINFO_TYPE);
343 	baseptr = preload_search_info(modptr, MODINFO_ADDR);
344 	sizeptr = preload_search_info(modptr, MODINFO_SIZE);
345 	hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
346 	    MODINFOMD_ELFHDR);
347 	shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
348 	    MODINFOMD_SHDR);
349 	if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
350 	    " obj module") != 0 &&
351 	    strcmp(type, "elf obj module") != 0)) {
352 		return (EFTYPE);
353 	}
354 	if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
355 	    shdr == NULL)
356 		return (EINVAL);
357 
358 	lf = linker_make_file(filename, &link_elf_class);
359 	if (lf == NULL)
360 		return (ENOMEM);
361 
362 	ef = (elf_file_t)lf;
363 	ef->preloaded = 1;
364 	ef->address = *(caddr_t *)baseptr;
365 	lf->address = *(caddr_t *)baseptr;
366 	lf->size = *(size_t *)sizeptr;
367 
368 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
369 	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
370 	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
371 	    hdr->e_version != EV_CURRENT ||
372 	    hdr->e_type != ET_REL ||
373 	    hdr->e_machine != ELF_TARG_MACH) {
374 		error = EFTYPE;
375 		goto out;
376 	}
377 	ef->e_shdr = shdr;
378 
379 	/* Scan the section header for information and table sizing. */
380 	symtabindex = -1;
381 	symstrindex = -1;
382 	for (i = 0; i < hdr->e_shnum; i++) {
383 		switch (shdr[i].sh_type) {
384 		case SHT_PROGBITS:
385 		case SHT_NOBITS:
386 #ifdef __amd64__
387 		case SHT_X86_64_UNWIND:
388 #endif
389 		case SHT_INIT_ARRAY:
390 		case SHT_FINI_ARRAY:
391 			/* Ignore sections not loaded by the loader. */
392 			if (shdr[i].sh_addr == 0)
393 				break;
394 			ef->nprogtab++;
395 			break;
396 		case SHT_SYMTAB:
397 			symtabindex = i;
398 			symstrindex = shdr[i].sh_link;
399 			break;
400 		case SHT_REL:
401 			/*
402 			 * Ignore relocation tables for sections not
403 			 * loaded by the loader.
404 			 */
405 			if (shdr[shdr[i].sh_info].sh_addr == 0)
406 				break;
407 			ef->nreltab++;
408 			break;
409 		case SHT_RELA:
410 			if (shdr[shdr[i].sh_info].sh_addr == 0)
411 				break;
412 			ef->nrelatab++;
413 			break;
414 		}
415 	}
416 
417 	shstrindex = hdr->e_shstrndx;
418 	if (ef->nprogtab == 0 || symstrindex < 0 ||
419 	    symstrindex >= hdr->e_shnum ||
420 	    shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
421 	    shstrindex >= hdr->e_shnum ||
422 	    shdr[shstrindex].sh_type != SHT_STRTAB) {
423 		printf("%s: bad/missing section headers\n", filename);
424 		error = ENOEXEC;
425 		goto out;
426 	}
427 
428 	/* Allocate space for tracking the load chunks */
429 	if (ef->nprogtab != 0)
430 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
431 		    M_LINKER, M_WAITOK | M_ZERO);
432 	if (ef->nreltab != 0)
433 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
434 		    M_LINKER, M_WAITOK | M_ZERO);
435 	if (ef->nrelatab != 0)
436 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
437 		    M_LINKER, M_WAITOK | M_ZERO);
438 	if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
439 	    (ef->nreltab != 0 && ef->reltab == NULL) ||
440 	    (ef->nrelatab != 0 && ef->relatab == NULL)) {
441 		error = ENOMEM;
442 		goto out;
443 	}
444 
445 	/* XXX, relocate the sh_addr fields saved by the loader. */
446 	off = 0;
447 	for (i = 0; i < hdr->e_shnum; i++) {
448 		if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
449 			off = shdr[i].sh_addr;
450 	}
451 	for (i = 0; i < hdr->e_shnum; i++) {
452 		if (shdr[i].sh_addr != 0)
453 			shdr[i].sh_addr = shdr[i].sh_addr - off +
454 			    (Elf_Addr)ef->address;
455 	}
456 
457 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
458 	ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
459 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
460 	ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
461 	ef->shstrcnt = shdr[shstrindex].sh_size;
462 	ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
463 
464 	/* Now fill out progtab and the relocation tables. */
465 	pb = 0;
466 	rl = 0;
467 	ra = 0;
468 	for (i = 0; i < hdr->e_shnum; i++) {
469 		switch (shdr[i].sh_type) {
470 		case SHT_PROGBITS:
471 		case SHT_NOBITS:
472 #ifdef __amd64__
473 		case SHT_X86_64_UNWIND:
474 #endif
475 		case SHT_INIT_ARRAY:
476 		case SHT_FINI_ARRAY:
477 			if (shdr[i].sh_addr == 0)
478 				break;
479 			ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
480 			if (shdr[i].sh_type == SHT_PROGBITS)
481 				ef->progtab[pb].name = "<<PROGBITS>>";
482 #ifdef __amd64__
483 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
484 				ef->progtab[pb].name = "<<UNWIND>>";
485 #endif
486 			else if (shdr[i].sh_type == SHT_INIT_ARRAY)
487 				ef->progtab[pb].name = "<<INIT_ARRAY>>";
488 			else if (shdr[i].sh_type == SHT_FINI_ARRAY)
489 				ef->progtab[pb].name = "<<FINI_ARRAY>>";
490 			else
491 				ef->progtab[pb].name = "<<NOBITS>>";
492 			ef->progtab[pb].size = shdr[i].sh_size;
493 			ef->progtab[pb].flags = shdr[i].sh_flags;
494 			ef->progtab[pb].sec = i;
495 			if (ef->shstrtab && shdr[i].sh_name != 0)
496 				ef->progtab[pb].name =
497 				    ef->shstrtab + shdr[i].sh_name;
498 			if (ef->progtab[pb].name != NULL &&
499 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
500 				void *dpcpu;
501 
502 				dpcpu = dpcpu_alloc(shdr[i].sh_size);
503 				if (dpcpu == NULL) {
504 					printf("%s: pcpu module space is out "
505 					    "of space; cannot allocate %#jx "
506 					    "for %s\n", __func__,
507 					    (uintmax_t)shdr[i].sh_size,
508 					    filename);
509 					error = ENOSPC;
510 					goto out;
511 				}
512 				memcpy(dpcpu, ef->progtab[pb].addr,
513 				    ef->progtab[pb].size);
514 				dpcpu_copy(dpcpu, shdr[i].sh_size);
515 				ef->progtab[pb].addr = dpcpu;
516 #ifdef VIMAGE
517 			} else if (ef->progtab[pb].name != NULL &&
518 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
519 				void *vnet_data;
520 
521 				vnet_data = vnet_data_alloc(shdr[i].sh_size);
522 				if (vnet_data == NULL) {
523 					printf("%s: vnet module space is out "
524 					    "of space; cannot allocate %#jx "
525 					    "for %s\n", __func__,
526 					    (uintmax_t)shdr[i].sh_size,
527 					    filename);
528 					error = ENOSPC;
529 					goto out;
530 				}
531 				memcpy(vnet_data, ef->progtab[pb].addr,
532 				    ef->progtab[pb].size);
533 				vnet_data_copy(vnet_data, shdr[i].sh_size);
534 				ef->progtab[pb].addr = vnet_data;
535 #endif
536 			} else if ((ef->progtab[pb].name != NULL &&
537 			    strcmp(ef->progtab[pb].name, ".ctors") == 0) ||
538 			    shdr[i].sh_type == SHT_INIT_ARRAY) {
539 				if (lf->ctors_addr != 0) {
540 					printf(
541 				    "%s: multiple ctor sections in %s\n",
542 					    __func__, filename);
543 				} else {
544 					lf->ctors_addr = ef->progtab[pb].addr;
545 					lf->ctors_size = shdr[i].sh_size;
546 				}
547 			} else if ((ef->progtab[pb].name != NULL &&
548 			    strcmp(ef->progtab[pb].name, ".dtors") == 0) ||
549 			    shdr[i].sh_type == SHT_FINI_ARRAY) {
550 				if (lf->dtors_addr != 0) {
551 					printf(
552 				    "%s: multiple dtor sections in %s\n",
553 					    __func__, filename);
554 				} else {
555 					lf->dtors_addr = ef->progtab[pb].addr;
556 					lf->dtors_size = shdr[i].sh_size;
557 				}
558 			}
559 
560 			/* Update all symbol values with the offset. */
561 			for (j = 0; j < ef->ddbsymcnt; j++) {
562 				es = &ef->ddbsymtab[j];
563 				if (es->st_shndx != i)
564 					continue;
565 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
566 			}
567 			pb++;
568 			break;
569 		case SHT_REL:
570 			if (shdr[shdr[i].sh_info].sh_addr == 0)
571 				break;
572 			ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
573 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
574 			ef->reltab[rl].sec = shdr[i].sh_info;
575 			rl++;
576 			break;
577 		case SHT_RELA:
578 			if (shdr[shdr[i].sh_info].sh_addr == 0)
579 				break;
580 			ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
581 			ef->relatab[ra].nrela =
582 			    shdr[i].sh_size / sizeof(Elf_Rela);
583 			ef->relatab[ra].sec = shdr[i].sh_info;
584 			ra++;
585 			break;
586 		}
587 	}
588 	if (pb != ef->nprogtab) {
589 		printf("%s: lost progbits\n", filename);
590 		error = ENOEXEC;
591 		goto out;
592 	}
593 	if (rl != ef->nreltab) {
594 		printf("%s: lost reltab\n", filename);
595 		error = ENOEXEC;
596 		goto out;
597 	}
598 	if (ra != ef->nrelatab) {
599 		printf("%s: lost relatab\n", filename);
600 		error = ENOEXEC;
601 		goto out;
602 	}
603 
604 	/*
605 	 * The file needs to be writeable and executable while applying
606 	 * relocations.  Mapping protections are applied once relocation
607 	 * processing is complete.
608 	 */
609 	link_elf_protect_range(ef, (vm_offset_t)ef->address,
610 	    round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
611 
612 	/* Local intra-module relocations */
613 	error = link_elf_reloc_local(lf, false);
614 	if (error != 0)
615 		goto out;
616 	*result = lf;
617 	return (0);
618 
619 out:
620 	/* preload not done this way */
621 	linker_file_unload(lf, LINKER_UNLOAD_FORCE);
622 	return (error);
623 }
624 
625 static void
626 link_elf_invoke_cbs(caddr_t addr, size_t size)
627 {
628 	void (**ctor)(void);
629 	size_t i, cnt;
630 
631 	if (addr == NULL || size == 0)
632 		return;
633 	cnt = size / sizeof(*ctor);
634 	ctor = (void *)addr;
635 	for (i = 0; i < cnt; i++) {
636 		if (ctor[i] != NULL)
637 			(*ctor[i])();
638 	}
639 }
640 
641 static int
642 link_elf_link_preload_finish(linker_file_t lf)
643 {
644 	elf_file_t ef;
645 	int error;
646 
647 	ef = (elf_file_t)lf;
648 	error = relocate_file(ef);
649 	if (error)
650 		return (error);
651 
652 	/* Notify MD code that a module is being loaded. */
653 	error = elf_cpu_load_file(lf);
654 	if (error)
655 		return (error);
656 
657 #if defined(__i386__) || defined(__amd64__)
658 	/* Now ifuncs. */
659 	error = link_elf_reloc_local(lf, true);
660 	if (error != 0)
661 		return (error);
662 #endif
663 
664 	/* Apply protections now that relocation processing is complete. */
665 	link_elf_protect(ef);
666 
667 	link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
668 	return (0);
669 }
670 
671 static int
672 link_elf_load_file(linker_class_t cls, const char *filename,
673     linker_file_t *result)
674 {
675 	struct nameidata *nd;
676 	struct thread *td = curthread;	/* XXX */
677 	Elf_Ehdr *hdr;
678 	Elf_Shdr *shdr;
679 	Elf_Sym *es;
680 	int nbytes, i, j;
681 	vm_offset_t mapbase;
682 	size_t mapsize;
683 	int error = 0;
684 	ssize_t resid;
685 	int flags;
686 	elf_file_t ef;
687 	linker_file_t lf;
688 	int symtabindex;
689 	int symstrindex;
690 	int shstrindex;
691 	int nsym;
692 	int pb, rl, ra;
693 	int alignmask;
694 
695 	shdr = NULL;
696 	lf = NULL;
697 	mapsize = 0;
698 	hdr = NULL;
699 
700 	nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
701 	NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename);
702 	flags = FREAD;
703 	error = vn_open(nd, &flags, 0, NULL);
704 	if (error) {
705 		free(nd, M_TEMP);
706 		return error;
707 	}
708 	NDFREE(nd, NDF_ONLY_PNBUF);
709 	if (nd->ni_vp->v_type != VREG) {
710 		error = ENOEXEC;
711 		goto out;
712 	}
713 #ifdef MAC
714 	error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
715 	if (error) {
716 		goto out;
717 	}
718 #endif
719 
720 	/* Read the elf header from the file. */
721 	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
722 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
723 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
724 	    &resid, td);
725 	if (error)
726 		goto out;
727 	if (resid != 0){
728 		error = ENOEXEC;
729 		goto out;
730 	}
731 
732 	if (!IS_ELF(*hdr)) {
733 		error = ENOEXEC;
734 		goto out;
735 	}
736 
737 	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
738 	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
739 		link_elf_error(filename, "Unsupported file layout");
740 		error = ENOEXEC;
741 		goto out;
742 	}
743 	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
744 	    || hdr->e_version != EV_CURRENT) {
745 		link_elf_error(filename, "Unsupported file version");
746 		error = ENOEXEC;
747 		goto out;
748 	}
749 	if (hdr->e_type != ET_REL) {
750 		error = ENOSYS;
751 		goto out;
752 	}
753 	if (hdr->e_machine != ELF_TARG_MACH) {
754 		link_elf_error(filename, "Unsupported machine");
755 		error = ENOEXEC;
756 		goto out;
757 	}
758 
759 	lf = linker_make_file(filename, &link_elf_class);
760 	if (!lf) {
761 		error = ENOMEM;
762 		goto out;
763 	}
764 	ef = (elf_file_t) lf;
765 	ef->nprogtab = 0;
766 	ef->e_shdr = 0;
767 	ef->nreltab = 0;
768 	ef->nrelatab = 0;
769 
770 	/* Allocate and read in the section header */
771 	nbytes = hdr->e_shnum * hdr->e_shentsize;
772 	if (nbytes == 0 || hdr->e_shoff == 0 ||
773 	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
774 		error = ENOEXEC;
775 		goto out;
776 	}
777 	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
778 	ef->e_shdr = shdr;
779 	error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
780 	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
781 	    NOCRED, &resid, td);
782 	if (error)
783 		goto out;
784 	if (resid) {
785 		error = ENOEXEC;
786 		goto out;
787 	}
788 
789 	/* Scan the section header for information and table sizing. */
790 	nsym = 0;
791 	symtabindex = -1;
792 	symstrindex = -1;
793 	for (i = 0; i < hdr->e_shnum; i++) {
794 		if (shdr[i].sh_size == 0)
795 			continue;
796 		switch (shdr[i].sh_type) {
797 		case SHT_PROGBITS:
798 		case SHT_NOBITS:
799 #ifdef __amd64__
800 		case SHT_X86_64_UNWIND:
801 #endif
802 		case SHT_INIT_ARRAY:
803 		case SHT_FINI_ARRAY:
804 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
805 				break;
806 			ef->nprogtab++;
807 			break;
808 		case SHT_SYMTAB:
809 			nsym++;
810 			symtabindex = i;
811 			symstrindex = shdr[i].sh_link;
812 			break;
813 		case SHT_REL:
814 			/*
815 			 * Ignore relocation tables for unallocated
816 			 * sections.
817 			 */
818 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
819 				break;
820 			ef->nreltab++;
821 			break;
822 		case SHT_RELA:
823 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
824 				break;
825 			ef->nrelatab++;
826 			break;
827 		case SHT_STRTAB:
828 			break;
829 		}
830 	}
831 	if (ef->nprogtab == 0) {
832 		link_elf_error(filename, "file has no contents");
833 		error = ENOEXEC;
834 		goto out;
835 	}
836 	if (nsym != 1) {
837 		/* Only allow one symbol table for now */
838 		link_elf_error(filename,
839 		    "file must have exactly one symbol table");
840 		error = ENOEXEC;
841 		goto out;
842 	}
843 	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
844 	    shdr[symstrindex].sh_type != SHT_STRTAB) {
845 		link_elf_error(filename, "file has invalid symbol strings");
846 		error = ENOEXEC;
847 		goto out;
848 	}
849 
850 	/* Allocate space for tracking the load chunks */
851 	if (ef->nprogtab != 0)
852 		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
853 		    M_LINKER, M_WAITOK | M_ZERO);
854 	if (ef->nreltab != 0)
855 		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
856 		    M_LINKER, M_WAITOK | M_ZERO);
857 	if (ef->nrelatab != 0)
858 		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
859 		    M_LINKER, M_WAITOK | M_ZERO);
860 
861 	if (symtabindex == -1) {
862 		link_elf_error(filename, "lost symbol table index");
863 		error = ENOEXEC;
864 		goto out;
865 	}
866 	/* Allocate space for and load the symbol table */
867 	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
868 	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
869 	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
870 	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
871 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
872 	    &resid, td);
873 	if (error)
874 		goto out;
875 	if (resid != 0){
876 		error = EINVAL;
877 		goto out;
878 	}
879 
880 	/* Allocate space for and load the symbol strings */
881 	ef->ddbstrcnt = shdr[symstrindex].sh_size;
882 	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
883 	error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
884 	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
885 	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
886 	    &resid, td);
887 	if (error)
888 		goto out;
889 	if (resid != 0){
890 		error = EINVAL;
891 		goto out;
892 	}
893 
894 	/* Do we have a string table for the section names?  */
895 	shstrindex = -1;
896 	if (hdr->e_shstrndx != 0 &&
897 	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
898 		shstrindex = hdr->e_shstrndx;
899 		ef->shstrcnt = shdr[shstrindex].sh_size;
900 		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
901 		    M_WAITOK);
902 		error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
903 		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
904 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
905 		    &resid, td);
906 		if (error)
907 			goto out;
908 		if (resid != 0){
909 			error = EINVAL;
910 			goto out;
911 		}
912 	}
913 
914 	/* Size up code/data(progbits) and bss(nobits). */
915 	alignmask = 0;
916 	for (i = 0; i < hdr->e_shnum; i++) {
917 		if (shdr[i].sh_size == 0)
918 			continue;
919 		switch (shdr[i].sh_type) {
920 		case SHT_PROGBITS:
921 		case SHT_NOBITS:
922 #ifdef __amd64__
923 		case SHT_X86_64_UNWIND:
924 #endif
925 		case SHT_INIT_ARRAY:
926 		case SHT_FINI_ARRAY:
927 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
928 				break;
929 			alignmask = shdr[i].sh_addralign - 1;
930 			mapsize += alignmask;
931 			mapsize &= ~alignmask;
932 			mapsize += shdr[i].sh_size;
933 			break;
934 		}
935 	}
936 
937 	/*
938 	 * We know how much space we need for the text/data/bss/etc.
939 	 * This stuff needs to be in a single chunk so that profiling etc
940 	 * can get the bounds and gdb can associate offsets with modules
941 	 */
942 	ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
943 	    VM_PROT_ALL, 0, thread0.td_ucred);
944 	if (ef->object == NULL) {
945 		error = ENOMEM;
946 		goto out;
947 	}
948 #if VM_NRESERVLEVEL > 0
949 	vm_object_color(ef->object, 0);
950 #endif
951 
952 	/*
953 	 * In order to satisfy amd64's architectural requirements on the
954 	 * location of code and data in the kernel's address space, request a
955 	 * mapping that is above the kernel.
956 	 *
957 	 * Protections will be restricted once relocations are applied.
958 	 */
959 #ifdef __amd64__
960 	mapbase = KERNBASE;
961 #else
962 	mapbase = VM_MIN_KERNEL_ADDRESS;
963 #endif
964 	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
965 	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
966 	    VM_PROT_ALL, 0);
967 	if (error != KERN_SUCCESS) {
968 		vm_object_deallocate(ef->object);
969 		ef->object = NULL;
970 		error = ENOMEM;
971 		goto out;
972 	}
973 
974 	/* Wire the pages */
975 	error = vm_map_wire(kernel_map, mapbase,
976 	    mapbase + round_page(mapsize),
977 	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
978 	if (error != KERN_SUCCESS) {
979 		error = ENOMEM;
980 		goto out;
981 	}
982 
983 	/* Inform the kld system about the situation */
984 	lf->address = ef->address = (caddr_t)mapbase;
985 	lf->size = mapsize;
986 
987 	/*
988 	 * Now load code/data(progbits), zero bss(nobits), allocate space for
989 	 * and load relocs
990 	 */
991 	pb = 0;
992 	rl = 0;
993 	ra = 0;
994 	alignmask = 0;
995 	for (i = 0; i < hdr->e_shnum; i++) {
996 		if (shdr[i].sh_size == 0)
997 			continue;
998 		switch (shdr[i].sh_type) {
999 		case SHT_PROGBITS:
1000 		case SHT_NOBITS:
1001 #ifdef __amd64__
1002 		case SHT_X86_64_UNWIND:
1003 #endif
1004 		case SHT_INIT_ARRAY:
1005 		case SHT_FINI_ARRAY:
1006 			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
1007 				break;
1008 			alignmask = shdr[i].sh_addralign - 1;
1009 			mapbase += alignmask;
1010 			mapbase &= ~alignmask;
1011 			if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
1012 				ef->progtab[pb].name =
1013 				    ef->shstrtab + shdr[i].sh_name;
1014 				if (!strcmp(ef->progtab[pb].name, ".ctors") ||
1015 				    shdr[i].sh_type == SHT_INIT_ARRAY) {
1016 					if (lf->ctors_addr != 0) {
1017 						printf(
1018 				    "%s: multiple ctor sections in %s\n",
1019 						    __func__, filename);
1020 					} else {
1021 						lf->ctors_addr =
1022 						    (caddr_t)mapbase;
1023 						lf->ctors_size =
1024 						    shdr[i].sh_size;
1025 					}
1026 				} else if (!strcmp(ef->progtab[pb].name,
1027 				    ".dtors") ||
1028 				    shdr[i].sh_type == SHT_FINI_ARRAY) {
1029 					if (lf->dtors_addr != 0) {
1030 						printf(
1031 				    "%s: multiple dtor sections in %s\n",
1032 						    __func__, filename);
1033 					} else {
1034 						lf->dtors_addr =
1035 						    (caddr_t)mapbase;
1036 						lf->dtors_size =
1037 						    shdr[i].sh_size;
1038 					}
1039 				}
1040 			} else if (shdr[i].sh_type == SHT_PROGBITS)
1041 				ef->progtab[pb].name = "<<PROGBITS>>";
1042 #ifdef __amd64__
1043 			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
1044 				ef->progtab[pb].name = "<<UNWIND>>";
1045 #endif
1046 			else
1047 				ef->progtab[pb].name = "<<NOBITS>>";
1048 			if (ef->progtab[pb].name != NULL &&
1049 			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
1050 				ef->progtab[pb].addr =
1051 				    dpcpu_alloc(shdr[i].sh_size);
1052 				if (ef->progtab[pb].addr == NULL) {
1053 					printf("%s: pcpu module space is out "
1054 					    "of space; cannot allocate %#jx "
1055 					    "for %s\n", __func__,
1056 					    (uintmax_t)shdr[i].sh_size,
1057 					    filename);
1058 				}
1059 			}
1060 #ifdef VIMAGE
1061 			else if (ef->progtab[pb].name != NULL &&
1062 			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1063 				ef->progtab[pb].addr =
1064 				    vnet_data_alloc(shdr[i].sh_size);
1065 				if (ef->progtab[pb].addr == NULL) {
1066 					printf("%s: vnet module space is out "
1067 					    "of space; cannot allocate %#jx "
1068 					    "for %s\n", __func__,
1069 					    (uintmax_t)shdr[i].sh_size,
1070 					    filename);
1071 				}
1072 			}
1073 #endif
1074 			else
1075 				ef->progtab[pb].addr =
1076 				    (void *)(uintptr_t)mapbase;
1077 			if (ef->progtab[pb].addr == NULL) {
1078 				error = ENOSPC;
1079 				goto out;
1080 			}
1081 			ef->progtab[pb].size = shdr[i].sh_size;
1082 			ef->progtab[pb].flags = shdr[i].sh_flags;
1083 			ef->progtab[pb].sec = i;
1084 			if (shdr[i].sh_type == SHT_PROGBITS
1085 #ifdef __amd64__
1086 			    || shdr[i].sh_type == SHT_X86_64_UNWIND
1087 #endif
1088 			    ) {
1089 				error = vn_rdwr(UIO_READ, nd->ni_vp,
1090 				    ef->progtab[pb].addr,
1091 				    shdr[i].sh_size, shdr[i].sh_offset,
1092 				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1093 				    NOCRED, &resid, td);
1094 				if (error)
1095 					goto out;
1096 				if (resid != 0){
1097 					error = EINVAL;
1098 					goto out;
1099 				}
1100 				/* Initialize the per-cpu or vnet area. */
1101 				if (ef->progtab[pb].addr != (void *)mapbase &&
1102 				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1103 					dpcpu_copy(ef->progtab[pb].addr,
1104 					    shdr[i].sh_size);
1105 #ifdef VIMAGE
1106 				else if (ef->progtab[pb].addr !=
1107 				    (void *)mapbase &&
1108 				    !strcmp(ef->progtab[pb].name, VNET_SETNAME))
1109 					vnet_data_copy(ef->progtab[pb].addr,
1110 					    shdr[i].sh_size);
1111 #endif
1112 			} else
1113 				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1114 
1115 			/* Update all symbol values with the offset. */
1116 			for (j = 0; j < ef->ddbsymcnt; j++) {
1117 				es = &ef->ddbsymtab[j];
1118 				if (es->st_shndx != i)
1119 					continue;
1120 				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1121 			}
1122 			mapbase += shdr[i].sh_size;
1123 			pb++;
1124 			break;
1125 		case SHT_REL:
1126 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1127 				break;
1128 			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1129 			    M_WAITOK);
1130 			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1131 			ef->reltab[rl].sec = shdr[i].sh_info;
1132 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1133 			    (void *)ef->reltab[rl].rel,
1134 			    shdr[i].sh_size, shdr[i].sh_offset,
1135 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1136 			    &resid, td);
1137 			if (error)
1138 				goto out;
1139 			if (resid != 0){
1140 				error = EINVAL;
1141 				goto out;
1142 			}
1143 			rl++;
1144 			break;
1145 		case SHT_RELA:
1146 			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1147 				break;
1148 			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1149 			    M_WAITOK);
1150 			ef->relatab[ra].nrela =
1151 			    shdr[i].sh_size / sizeof(Elf_Rela);
1152 			ef->relatab[ra].sec = shdr[i].sh_info;
1153 			error = vn_rdwr(UIO_READ, nd->ni_vp,
1154 			    (void *)ef->relatab[ra].rela,
1155 			    shdr[i].sh_size, shdr[i].sh_offset,
1156 			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1157 			    &resid, td);
1158 			if (error)
1159 				goto out;
1160 			if (resid != 0){
1161 				error = EINVAL;
1162 				goto out;
1163 			}
1164 			ra++;
1165 			break;
1166 		}
1167 	}
1168 	if (pb != ef->nprogtab) {
1169 		link_elf_error(filename, "lost progbits");
1170 		error = ENOEXEC;
1171 		goto out;
1172 	}
1173 	if (rl != ef->nreltab) {
1174 		link_elf_error(filename, "lost reltab");
1175 		error = ENOEXEC;
1176 		goto out;
1177 	}
1178 	if (ra != ef->nrelatab) {
1179 		link_elf_error(filename, "lost relatab");
1180 		error = ENOEXEC;
1181 		goto out;
1182 	}
1183 	if (mapbase != (vm_offset_t)ef->address + mapsize) {
1184 		printf(
1185 		    "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1186 		    filename != NULL ? filename : "<none>",
1187 		    (u_long)mapbase, ef->address, (u_long)mapsize,
1188 		    (u_long)(vm_offset_t)ef->address + mapsize);
1189 		error = ENOMEM;
1190 		goto out;
1191 	}
1192 
1193 	/* Local intra-module relocations */
1194 	error = link_elf_reloc_local(lf, false);
1195 	if (error != 0)
1196 		goto out;
1197 
1198 	/* Pull in dependencies */
1199 	VOP_UNLOCK(nd->ni_vp);
1200 	error = linker_load_dependencies(lf);
1201 	vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1202 	if (error)
1203 		goto out;
1204 
1205 	/* External relocations */
1206 	error = relocate_file(ef);
1207 	if (error)
1208 		goto out;
1209 
1210 	/* Notify MD code that a module is being loaded. */
1211 	error = elf_cpu_load_file(lf);
1212 	if (error)
1213 		goto out;
1214 
1215 #if defined(__i386__) || defined(__amd64__)
1216 	/* Now ifuncs. */
1217 	error = link_elf_reloc_local(lf, true);
1218 	if (error != 0)
1219 		goto out;
1220 #endif
1221 
1222 	link_elf_protect(ef);
1223 	link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
1224 	*result = lf;
1225 
1226 out:
1227 	VOP_UNLOCK(nd->ni_vp);
1228 	vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1229 	free(nd, M_TEMP);
1230 	if (error && lf)
1231 		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1232 	free(hdr, M_LINKER);
1233 
1234 	return error;
1235 }
1236 
1237 static void
1238 link_elf_unload_file(linker_file_t file)
1239 {
1240 	elf_file_t ef = (elf_file_t) file;
1241 	u_int i;
1242 
1243 	link_elf_invoke_cbs(file->dtors_addr, file->dtors_size);
1244 
1245 	/* Notify MD code that a module is being unloaded. */
1246 	elf_cpu_unload_file(file);
1247 
1248 	if (ef->progtab) {
1249 		for (i = 0; i < ef->nprogtab; i++) {
1250 			if (ef->progtab[i].size == 0)
1251 				continue;
1252 			if (ef->progtab[i].name == NULL)
1253 				continue;
1254 			if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1255 				dpcpu_free(ef->progtab[i].addr,
1256 				    ef->progtab[i].size);
1257 #ifdef VIMAGE
1258 			else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1259 				vnet_data_free(ef->progtab[i].addr,
1260 				    ef->progtab[i].size);
1261 #endif
1262 		}
1263 	}
1264 	if (ef->preloaded) {
1265 		free(ef->reltab, M_LINKER);
1266 		free(ef->relatab, M_LINKER);
1267 		free(ef->progtab, M_LINKER);
1268 		free(ef->ctftab, M_LINKER);
1269 		free(ef->ctfoff, M_LINKER);
1270 		free(ef->typoff, M_LINKER);
1271 		if (file->pathname != NULL)
1272 			preload_delete_name(file->pathname);
1273 		return;
1274 	}
1275 
1276 	for (i = 0; i < ef->nreltab; i++)
1277 		free(ef->reltab[i].rel, M_LINKER);
1278 	for (i = 0; i < ef->nrelatab; i++)
1279 		free(ef->relatab[i].rela, M_LINKER);
1280 	free(ef->reltab, M_LINKER);
1281 	free(ef->relatab, M_LINKER);
1282 	free(ef->progtab, M_LINKER);
1283 
1284 	if (ef->object != NULL)
1285 		vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1286 		    (vm_offset_t)ef->address + ptoa(ef->object->size));
1287 	free(ef->e_shdr, M_LINKER);
1288 	free(ef->ddbsymtab, M_LINKER);
1289 	free(ef->ddbstrtab, M_LINKER);
1290 	free(ef->shstrtab, M_LINKER);
1291 	free(ef->ctftab, M_LINKER);
1292 	free(ef->ctfoff, M_LINKER);
1293 	free(ef->typoff, M_LINKER);
1294 }
1295 
1296 static const char *
1297 symbol_name(elf_file_t ef, Elf_Size r_info)
1298 {
1299 	const Elf_Sym *ref;
1300 
1301 	if (ELF_R_SYM(r_info)) {
1302 		ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1303 		return ef->ddbstrtab + ref->st_name;
1304 	} else
1305 		return NULL;
1306 }
1307 
1308 static Elf_Addr
1309 findbase(elf_file_t ef, int sec)
1310 {
1311 	int i;
1312 	Elf_Addr base = 0;
1313 
1314 	for (i = 0; i < ef->nprogtab; i++) {
1315 		if (sec == ef->progtab[i].sec) {
1316 			base = (Elf_Addr)ef->progtab[i].addr;
1317 			break;
1318 		}
1319 	}
1320 	return base;
1321 }
1322 
1323 static int
1324 relocate_file1(elf_file_t ef, bool ifuncs)
1325 {
1326 	const Elf_Rel *rellim;
1327 	const Elf_Rel *rel;
1328 	const Elf_Rela *relalim;
1329 	const Elf_Rela *rela;
1330 	const char *symname;
1331 	const Elf_Sym *sym;
1332 	int i;
1333 	Elf_Size symidx;
1334 	Elf_Addr base;
1335 
1336 	/* Perform relocations without addend if there are any: */
1337 	for (i = 0; i < ef->nreltab; i++) {
1338 		rel = ef->reltab[i].rel;
1339 		if (rel == NULL) {
1340 			link_elf_error(ef->lf.filename, "lost a reltab!");
1341 			return (ENOEXEC);
1342 		}
1343 		rellim = rel + ef->reltab[i].nrel;
1344 		base = findbase(ef, ef->reltab[i].sec);
1345 		if (base == 0) {
1346 			link_elf_error(ef->lf.filename, "lost base for reltab");
1347 			return (ENOEXEC);
1348 		}
1349 		for ( ; rel < rellim; rel++) {
1350 			symidx = ELF_R_SYM(rel->r_info);
1351 			if (symidx >= ef->ddbsymcnt)
1352 				continue;
1353 			sym = ef->ddbsymtab + symidx;
1354 			/* Local relocs are already done */
1355 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1356 				continue;
1357 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1358 			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1359 				continue;
1360 			if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1361 			    elf_obj_lookup)) {
1362 				symname = symbol_name(ef, rel->r_info);
1363 				printf("link_elf_obj: symbol %s undefined\n",
1364 				    symname);
1365 				return (ENOENT);
1366 			}
1367 		}
1368 	}
1369 
1370 	/* Perform relocations with addend if there are any: */
1371 	for (i = 0; i < ef->nrelatab; i++) {
1372 		rela = ef->relatab[i].rela;
1373 		if (rela == NULL) {
1374 			link_elf_error(ef->lf.filename, "lost a relatab!");
1375 			return (ENOEXEC);
1376 		}
1377 		relalim = rela + ef->relatab[i].nrela;
1378 		base = findbase(ef, ef->relatab[i].sec);
1379 		if (base == 0) {
1380 			link_elf_error(ef->lf.filename,
1381 			    "lost base for relatab");
1382 			return (ENOEXEC);
1383 		}
1384 		for ( ; rela < relalim; rela++) {
1385 			symidx = ELF_R_SYM(rela->r_info);
1386 			if (symidx >= ef->ddbsymcnt)
1387 				continue;
1388 			sym = ef->ddbsymtab + symidx;
1389 			/* Local relocs are already done */
1390 			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1391 				continue;
1392 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1393 			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1394 				continue;
1395 			if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1396 			    elf_obj_lookup)) {
1397 				symname = symbol_name(ef, rela->r_info);
1398 				printf("link_elf_obj: symbol %s undefined\n",
1399 				    symname);
1400 				return (ENOENT);
1401 			}
1402 		}
1403 	}
1404 
1405 	/*
1406 	 * Only clean SHN_FBSD_CACHED for successful return.  If we
1407 	 * modified symbol table for the object but found an
1408 	 * unresolved symbol, there is no reason to roll back.
1409 	 */
1410 	elf_obj_cleanup_globals_cache(ef);
1411 
1412 	return (0);
1413 }
1414 
1415 static int
1416 relocate_file(elf_file_t ef)
1417 {
1418 	int error;
1419 
1420 	error = relocate_file1(ef, false);
1421 	if (error == 0)
1422 		error = relocate_file1(ef, true);
1423 	return (error);
1424 }
1425 
1426 static int
1427 link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1428 {
1429 	elf_file_t ef = (elf_file_t) lf;
1430 	const Elf_Sym *symp;
1431 	const char *strp;
1432 	int i;
1433 
1434 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1435 		strp = ef->ddbstrtab + symp->st_name;
1436 		if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1437 			*sym = (c_linker_sym_t) symp;
1438 			return 0;
1439 		}
1440 	}
1441 	return (ENOENT);
1442 }
1443 
1444 static int
1445 link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1446     linker_symval_t *symval)
1447 {
1448 	elf_file_t ef;
1449 	const Elf_Sym *es;
1450 	caddr_t val;
1451 
1452 	ef = (elf_file_t) lf;
1453 	es = (const Elf_Sym*) sym;
1454 	val = (caddr_t)es->st_value;
1455 	if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1456 		symval->name = ef->ddbstrtab + es->st_name;
1457 		val = (caddr_t)es->st_value;
1458 		if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1459 			val = ((caddr_t (*)(void))val)();
1460 		symval->value = val;
1461 		symval->size = es->st_size;
1462 		return (0);
1463 	}
1464 	return (ENOENT);
1465 }
1466 
1467 static int
1468 link_elf_search_symbol(linker_file_t lf, caddr_t value,
1469     c_linker_sym_t *sym, long *diffp)
1470 {
1471 	elf_file_t ef = (elf_file_t)lf;
1472 	u_long off = (uintptr_t)(void *)value;
1473 	u_long diff = off;
1474 	u_long st_value;
1475 	const Elf_Sym *es;
1476 	const Elf_Sym *best = NULL;
1477 	int i;
1478 
1479 	for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1480 		if (es->st_name == 0)
1481 			continue;
1482 		st_value = es->st_value;
1483 		if (off >= st_value) {
1484 			if (off - st_value < diff) {
1485 				diff = off - st_value;
1486 				best = es;
1487 				if (diff == 0)
1488 					break;
1489 			} else if (off - st_value == diff) {
1490 				best = es;
1491 			}
1492 		}
1493 	}
1494 	if (best == NULL)
1495 		*diffp = off;
1496 	else
1497 		*diffp = diff;
1498 	*sym = (c_linker_sym_t) best;
1499 
1500 	return (0);
1501 }
1502 
1503 /*
1504  * Look up a linker set on an ELF system.
1505  */
1506 static int
1507 link_elf_lookup_set(linker_file_t lf, const char *name,
1508     void ***startp, void ***stopp, int *countp)
1509 {
1510 	elf_file_t ef = (elf_file_t)lf;
1511 	void **start, **stop;
1512 	int i, count;
1513 
1514 	/* Relative to section number */
1515 	for (i = 0; i < ef->nprogtab; i++) {
1516 		if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1517 		    strcmp(ef->progtab[i].name + 4, name) == 0) {
1518 			start  = (void **)ef->progtab[i].addr;
1519 			stop = (void **)((char *)ef->progtab[i].addr +
1520 			    ef->progtab[i].size);
1521 			count = stop - start;
1522 			if (startp)
1523 				*startp = start;
1524 			if (stopp)
1525 				*stopp = stop;
1526 			if (countp)
1527 				*countp = count;
1528 			return (0);
1529 		}
1530 	}
1531 	return (ESRCH);
1532 }
1533 
1534 static int
1535 link_elf_each_function_name(linker_file_t file,
1536     int (*callback)(const char *, void *), void *opaque)
1537 {
1538 	elf_file_t ef = (elf_file_t)file;
1539 	const Elf_Sym *symp;
1540 	int i, error;
1541 
1542 	/* Exhaustive search */
1543 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1544 		if (symp->st_value != 0 &&
1545 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1546 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1547 			error = callback(ef->ddbstrtab + symp->st_name, opaque);
1548 			if (error)
1549 				return (error);
1550 		}
1551 	}
1552 	return (0);
1553 }
1554 
1555 static int
1556 link_elf_each_function_nameval(linker_file_t file,
1557     linker_function_nameval_callback_t callback, void *opaque)
1558 {
1559 	linker_symval_t symval;
1560 	elf_file_t ef = (elf_file_t)file;
1561 	const Elf_Sym* symp;
1562 	int i, error;
1563 
1564 	/* Exhaustive search */
1565 	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1566 		if (symp->st_value != 0 &&
1567 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1568 		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1569 			error = link_elf_symbol_values(file,
1570 			    (c_linker_sym_t)symp, &symval);
1571 			if (error)
1572 				return (error);
1573 			error = callback(file, i, &symval, opaque);
1574 			if (error)
1575 				return (error);
1576 		}
1577 	}
1578 	return (0);
1579 }
1580 
1581 static void
1582 elf_obj_cleanup_globals_cache(elf_file_t ef)
1583 {
1584 	Elf_Sym *sym;
1585 	Elf_Size i;
1586 
1587 	for (i = 0; i < ef->ddbsymcnt; i++) {
1588 		sym = ef->ddbsymtab + i;
1589 		if (sym->st_shndx == SHN_FBSD_CACHED) {
1590 			sym->st_shndx = SHN_UNDEF;
1591 			sym->st_value = 0;
1592 		}
1593 	}
1594 }
1595 
1596 /*
1597  * Symbol lookup function that can be used when the symbol index is known (ie
1598  * in relocations). It uses the symbol index instead of doing a fully fledged
1599  * hash table based lookup when such is valid. For example for local symbols.
1600  * This is not only more efficient, it's also more correct. It's not always
1601  * the case that the symbol can be found through the hash table.
1602  */
1603 static int
1604 elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1605 {
1606 	elf_file_t ef = (elf_file_t)lf;
1607 	Elf_Sym *sym;
1608 	const char *symbol;
1609 	Elf_Addr res1;
1610 
1611 	/* Don't even try to lookup the symbol if the index is bogus. */
1612 	if (symidx >= ef->ddbsymcnt) {
1613 		*res = 0;
1614 		return (EINVAL);
1615 	}
1616 
1617 	sym = ef->ddbsymtab + symidx;
1618 
1619 	/* Quick answer if there is a definition included. */
1620 	if (sym->st_shndx != SHN_UNDEF) {
1621 		res1 = (Elf_Addr)sym->st_value;
1622 		if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1623 			res1 = ((Elf_Addr (*)(void))res1)();
1624 		*res = res1;
1625 		return (0);
1626 	}
1627 
1628 	/* If we get here, then it is undefined and needs a lookup. */
1629 	switch (ELF_ST_BIND(sym->st_info)) {
1630 	case STB_LOCAL:
1631 		/* Local, but undefined? huh? */
1632 		*res = 0;
1633 		return (EINVAL);
1634 
1635 	case STB_GLOBAL:
1636 	case STB_WEAK:
1637 		/* Relative to Data or Function name */
1638 		symbol = ef->ddbstrtab + sym->st_name;
1639 
1640 		/* Force a lookup failure if the symbol name is bogus. */
1641 		if (*symbol == 0) {
1642 			*res = 0;
1643 			return (EINVAL);
1644 		}
1645 		res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1646 
1647 		/*
1648 		 * Cache global lookups during module relocation. The failure
1649 		 * case is particularly expensive for callers, who must scan
1650 		 * through the entire globals table doing strcmp(). Cache to
1651 		 * avoid doing such work repeatedly.
1652 		 *
1653 		 * After relocation is complete, undefined globals will be
1654 		 * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1655 		 * above.
1656 		 */
1657 		if (res1 != 0) {
1658 			sym->st_shndx = SHN_FBSD_CACHED;
1659 			sym->st_value = res1;
1660 			*res = res1;
1661 			return (0);
1662 		} else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1663 			sym->st_value = 0;
1664 			*res = 0;
1665 			return (0);
1666 		}
1667 		return (EINVAL);
1668 
1669 	default:
1670 		return (EINVAL);
1671 	}
1672 }
1673 
1674 static void
1675 link_elf_fix_link_set(elf_file_t ef)
1676 {
1677 	static const char startn[] = "__start_";
1678 	static const char stopn[] = "__stop_";
1679 	Elf_Sym *sym;
1680 	const char *sym_name, *linkset_name;
1681 	Elf_Addr startp, stopp;
1682 	Elf_Size symidx;
1683 	int start, i;
1684 
1685 	startp = stopp = 0;
1686 	for (symidx = 1 /* zero entry is special */;
1687 		symidx < ef->ddbsymcnt; symidx++) {
1688 		sym = ef->ddbsymtab + symidx;
1689 		if (sym->st_shndx != SHN_UNDEF)
1690 			continue;
1691 
1692 		sym_name = ef->ddbstrtab + sym->st_name;
1693 		if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1694 			start = 1;
1695 			linkset_name = sym_name + sizeof(startn) - 1;
1696 		}
1697 		else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1698 			start = 0;
1699 			linkset_name = sym_name + sizeof(stopn) - 1;
1700 		}
1701 		else
1702 			continue;
1703 
1704 		for (i = 0; i < ef->nprogtab; i++) {
1705 			if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1706 				startp = (Elf_Addr)ef->progtab[i].addr;
1707 				stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1708 				break;
1709 			}
1710 		}
1711 		if (i == ef->nprogtab)
1712 			continue;
1713 
1714 		sym->st_value = start ? startp : stopp;
1715 		sym->st_shndx = i;
1716 	}
1717 }
1718 
1719 static int
1720 link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1721 {
1722 	elf_file_t ef = (elf_file_t)lf;
1723 	const Elf_Rel *rellim;
1724 	const Elf_Rel *rel;
1725 	const Elf_Rela *relalim;
1726 	const Elf_Rela *rela;
1727 	const Elf_Sym *sym;
1728 	Elf_Addr base;
1729 	int i;
1730 	Elf_Size symidx;
1731 
1732 	link_elf_fix_link_set(ef);
1733 
1734 	/* Perform relocations without addend if there are any: */
1735 	for (i = 0; i < ef->nreltab; i++) {
1736 		rel = ef->reltab[i].rel;
1737 		if (rel == NULL) {
1738 			link_elf_error(ef->lf.filename, "lost a reltab");
1739 			return (ENOEXEC);
1740 		}
1741 		rellim = rel + ef->reltab[i].nrel;
1742 		base = findbase(ef, ef->reltab[i].sec);
1743 		if (base == 0) {
1744 			link_elf_error(ef->lf.filename, "lost base for reltab");
1745 			return (ENOEXEC);
1746 		}
1747 		for ( ; rel < rellim; rel++) {
1748 			symidx = ELF_R_SYM(rel->r_info);
1749 			if (symidx >= ef->ddbsymcnt)
1750 				continue;
1751 			sym = ef->ddbsymtab + symidx;
1752 			/* Only do local relocs */
1753 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1754 				continue;
1755 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1756 			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1757 				continue;
1758 			if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1759 			    elf_obj_lookup) != 0)
1760 				return (ENOEXEC);
1761 		}
1762 	}
1763 
1764 	/* Perform relocations with addend if there are any: */
1765 	for (i = 0; i < ef->nrelatab; i++) {
1766 		rela = ef->relatab[i].rela;
1767 		if (rela == NULL) {
1768 			link_elf_error(ef->lf.filename, "lost a relatab!");
1769 			return (ENOEXEC);
1770 		}
1771 		relalim = rela + ef->relatab[i].nrela;
1772 		base = findbase(ef, ef->relatab[i].sec);
1773 		if (base == 0) {
1774 			link_elf_error(ef->lf.filename, "lost base for reltab");
1775 			return (ENOEXEC);
1776 		}
1777 		for ( ; rela < relalim; rela++) {
1778 			symidx = ELF_R_SYM(rela->r_info);
1779 			if (symidx >= ef->ddbsymcnt)
1780 				continue;
1781 			sym = ef->ddbsymtab + symidx;
1782 			/* Only do local relocs */
1783 			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1784 				continue;
1785 			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1786 			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1787 				continue;
1788 			if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1789 			    elf_obj_lookup) != 0)
1790 				return (ENOEXEC);
1791 		}
1792 	}
1793 	return (0);
1794 }
1795 
1796 static long
1797 link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1798 {
1799 	elf_file_t ef = (elf_file_t)lf;
1800 
1801 	*symtab = ef->ddbsymtab;
1802 	if (*symtab == NULL)
1803 		return (0);
1804 	return (ef->ddbsymcnt);
1805 }
1806 
1807 static long
1808 link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1809 {
1810 	elf_file_t ef = (elf_file_t)lf;
1811 
1812 	*strtab = ef->ddbstrtab;
1813 	if (*strtab == NULL)
1814 		return (0);
1815 	return (ef->ddbstrcnt);
1816 }
1817