xref: /freebsd/sys/dev/ksyms/ksyms.c (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 
33 #include <sys/conf.h>
34 #include <sys/elf.h>
35 #include <sys/linker.h>
36 #include <sys/malloc.h>
37 #include <sys/mman.h>
38 #include <sys/module.h>
39 #include <sys/proc.h>
40 #include <sys/queue.h>
41 #include <sys/resourcevar.h>
42 #include <sys/rwlock.h>
43 #include <sys/stat.h>
44 #include <sys/sx.h>
45 #include <sys/uio.h>
46 
47 #include <machine/elf.h>
48 
49 #include <vm/pmap.h>
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_object.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_pager.h>
55 
56 #include "linker_if.h"
57 
58 #define SHDR_NULL	0
59 #define SHDR_SYMTAB	1
60 #define SHDR_STRTAB	2
61 #define SHDR_SHSTRTAB	3
62 
63 #define SHDR_NUM	4
64 
65 #define STR_SYMTAB	".symtab"
66 #define STR_STRTAB	".strtab"
67 #define STR_SHSTRTAB	".shstrtab"
68 
69 #define KSYMS_DNAME	"ksyms"
70 
71 static d_open_t ksyms_open;
72 static d_read_t ksyms_read;
73 static d_mmap_single_t ksyms_mmap_single;
74 
75 static struct cdevsw ksyms_cdevsw = {
76 	.d_version =	D_VERSION,
77 	.d_flags =	0,
78 	.d_open =	ksyms_open,
79 	.d_read =	ksyms_read,
80 	.d_mmap_single = ksyms_mmap_single,
81 	.d_name =	KSYMS_DNAME
82 };
83 
84 struct ksyms_softc {
85 	LIST_ENTRY(ksyms_softc)	sc_list;
86 	vm_offset_t		sc_uaddr;
87 	size_t			sc_usize;
88 	vm_object_t		sc_obj;
89 	vm_size_t		sc_objsz;
90 	struct proc	       *sc_proc;
91 };
92 
93 static struct sx		 ksyms_mtx;
94 static struct cdev		*ksyms_dev;
95 static LIST_HEAD(, ksyms_softc)	 ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
96 
97 static const char	ksyms_shstrtab[] =
98 	"\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";
99 
100 struct ksyms_hdr {
101 	Elf_Ehdr	kh_ehdr;
102 	Elf_Phdr	kh_txtphdr;
103 	Elf_Phdr	kh_datphdr;
104 	Elf_Shdr	kh_shdr[SHDR_NUM];
105 	char		kh_shstrtab[sizeof(ksyms_shstrtab)];
106 };
107 
108 struct tsizes {
109 	size_t		ts_symsz;
110 	size_t		ts_strsz;
111 };
112 
113 struct toffsets {
114 	struct ksyms_softc *to_sc;
115 	vm_offset_t	to_symoff;
116 	vm_offset_t	to_stroff;
117 	unsigned	to_stridx;
118 	size_t		to_resid;
119 };
120 
121 static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");
122 
123 /*
124  * Get the symbol and string table sizes for a kernel module. Add it to the
125  * running total.
126  */
127 static int
128 ksyms_size_permod(linker_file_t lf, void *arg)
129 {
130 	struct tsizes *ts;
131 	const Elf_Sym *symtab;
132 	caddr_t strtab;
133 	long syms;
134 
135 	ts = arg;
136 
137 	syms = LINKER_SYMTAB_GET(lf, &symtab);
138 	ts->ts_symsz += syms * sizeof(Elf_Sym);
139 	ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);
140 
141 	return (0);
142 }
143 
144 /*
145  * For kernel module get the symbol and string table sizes, returning the
146  * totals in *ts.
147  */
148 static void
149 ksyms_size_calc(struct tsizes *ts)
150 {
151 
152 	ts->ts_symsz = 0;
153 	ts->ts_strsz = 0;
154 
155 	(void)linker_file_foreach(ksyms_size_permod, ts);
156 }
157 
158 static int
159 ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
160 {
161 	struct iovec iov;
162 	struct uio uio;
163 
164 	iov.iov_base = buf;
165 	iov.iov_len = sz;
166 	uio.uio_iov = &iov;
167 	uio.uio_iovcnt = 1;
168 	uio.uio_offset = off;
169 	uio.uio_resid = (ssize_t)sz;
170 	uio.uio_segflg = UIO_SYSSPACE;
171 	uio.uio_rw = UIO_WRITE;
172 	uio.uio_td = curthread;
173 
174 	return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
175 }
176 
177 #define SYMBLKSZ	(256 * sizeof(Elf_Sym))
178 
179 /*
180  * For a kernel module, add the symbol and string tables into the
181  * snapshot buffer.  Fix up the offsets in the tables.
182  */
183 static int
184 ksyms_add(linker_file_t lf, void *arg)
185 {
186 	char *buf;
187 	struct ksyms_softc *sc;
188 	struct toffsets *to;
189 	const Elf_Sym *symtab;
190 	Elf_Sym *symp;
191 	caddr_t strtab;
192 	size_t len, numsyms, strsz, symsz;
193 	linker_symval_t symval;
194 	int error, i, nsyms;
195 	bool fixup;
196 
197 	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
198 	to = arg;
199 	sc = to->to_sc;
200 
201 	MOD_SLOCK;
202 	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
203 	strsz = LINKER_STRTAB_GET(lf, &strtab);
204 	symsz = numsyms * sizeof(Elf_Sym);
205 
206 #ifdef RELOCATABLE_KERNEL
207 	fixup = true;
208 #else
209 	fixup = lf->id > 1;
210 #endif
211 
212 	while (symsz > 0) {
213 		len = min(SYMBLKSZ, symsz);
214 		bcopy(symtab, buf, len);
215 
216 		/*
217 		 * Fix up symbol table for kernel modules:
218 		 *   string offsets need adjusted
219 		 *   symbol values made absolute
220 		 */
221 		symp = (Elf_Sym *) buf;
222 		nsyms = len / sizeof(Elf_Sym);
223 		for (i = 0; i < nsyms; i++) {
224 			symp[i].st_name += to->to_stridx;
225 			if (fixup && LINKER_SYMBOL_VALUES(lf,
226 			    (c_linker_sym_t)&symtab[i], &symval) == 0) {
227 				symp[i].st_value = (uintptr_t)symval.value;
228 			}
229 		}
230 
231 		if (len > to->to_resid) {
232 			MOD_SUNLOCK;
233 			free(buf, M_KSYMS);
234 			return (ENXIO);
235 		}
236 		to->to_resid -= len;
237 		error = ksyms_emit(sc, buf, to->to_symoff, len);
238 		to->to_symoff += len;
239 		if (error != 0) {
240 			MOD_SUNLOCK;
241 			free(buf, M_KSYMS);
242 			return (error);
243 		}
244 
245 		symtab += nsyms;
246 		symsz -= len;
247 	}
248 	free(buf, M_KSYMS);
249 	MOD_SUNLOCK;
250 
251 	if (strsz > to->to_resid)
252 		return (ENXIO);
253 	to->to_resid -= strsz;
254 	error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
255 	to->to_stroff += strsz;
256 	to->to_stridx += strsz;
257 
258 	return (error);
259 }
260 
261 /*
262  * Create a single ELF symbol table for the kernel and kernel modules loaded
263  * at this time. Write this snapshot out in the process address space. Return
264  * 0 on success, otherwise error.
265  */
266 static int
267 ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
268 {
269 	struct toffsets	to;
270 	struct ksyms_hdr *hdr;
271 	int error;
272 
273 	hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
274 
275 	/*
276 	 * Create the ELF header.
277 	 */
278 	hdr->kh_ehdr.e_ident[EI_PAD] = 0;
279 	hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
280 	hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
281 	hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
282 	hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
283 	hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
284 	hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
285 	hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
286 	hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
287 	hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
288 	hdr->kh_ehdr.e_type = ET_EXEC;
289 	hdr->kh_ehdr.e_machine = ELF_ARCH;
290 	hdr->kh_ehdr.e_version = EV_CURRENT;
291 	hdr->kh_ehdr.e_entry = 0;
292 	hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
293 	hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
294 	hdr->kh_ehdr.e_flags = 0;
295 	hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
296 	hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
297 	hdr->kh_ehdr.e_phnum = 2;	/* Text and Data */
298 	hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
299 	hdr->kh_ehdr.e_shnum = SHDR_NUM;
300 	hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;
301 
302 	/*
303 	 * Add both the text and data program headers.
304 	 */
305 	hdr->kh_txtphdr.p_type = PT_LOAD;
306 	/* XXX - is there a way to put the actual .text addr/size here? */
307 	hdr->kh_txtphdr.p_vaddr = 0;
308 	hdr->kh_txtphdr.p_memsz = 0;
309 	hdr->kh_txtphdr.p_flags = PF_R | PF_X;
310 
311 	hdr->kh_datphdr.p_type = PT_LOAD;
312 	/* XXX - is there a way to put the actual .data addr/size here? */
313 	hdr->kh_datphdr.p_vaddr = 0;
314 	hdr->kh_datphdr.p_memsz = 0;
315 	hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;
316 
317 	/*
318 	 * Add the section headers: null, symtab, strtab, shstrtab.
319 	 */
320 
321 	/* First section header - null */
322 
323 	/* Second section header - symtab */
324 	hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
325 	hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
326 	hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
327 	hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
328 	hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
329 	hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
330 	hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
331 	hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
332 	hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
333 	hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);
334 
335 	/* Third section header - strtab */
336 	hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
337 	hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
338 	hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
339 	hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
340 	hdr->kh_shdr[SHDR_STRTAB].sh_offset =
341 	    hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
342 	hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
343 	hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
344 	hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
345 	hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
346 	hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;
347 
348 	/* Fourth section - shstrtab */
349 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
350 	    sizeof(STR_STRTAB);
351 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
352 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
353 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
354 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
355 	    offsetof(struct ksyms_hdr, kh_shstrtab);
356 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
357 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
358 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
359 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
360 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;
361 
362 	/* Copy shstrtab into the header. */
363 	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
364 
365 	to.to_sc = sc;
366 	to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
367 	to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
368 	to.to_stridx = 0;
369 	to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);
370 
371 	/* emit header */
372 	error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
373 	free(hdr, M_KSYMS);
374 	if (error != 0)
375 		return (error);
376 
377 	/* Add symbol and string tables for each kernel module. */
378 	error = linker_file_foreach(ksyms_add, &to);
379 	if (error != 0)
380 		return (error);
381 	if (to.to_resid != 0)
382 		return (ENXIO);
383 	return (0);
384 }
385 
386 static void
387 ksyms_cdevpriv_dtr(void *data)
388 {
389 	struct ksyms_softc *sc;
390 	vm_object_t obj;
391 
392 	sc = (struct ksyms_softc *)data;
393 
394 	sx_xlock(&ksyms_mtx);
395 	LIST_REMOVE(sc, sc_list);
396 	sx_xunlock(&ksyms_mtx);
397 	obj = sc->sc_obj;
398 	if (obj != NULL)
399 		vm_object_deallocate(obj);
400 	free(sc, M_KSYMS);
401 }
402 
403 static int
404 ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
405 {
406 	struct tsizes ts;
407 	struct ksyms_softc *sc;
408 	vm_object_t object;
409 	vm_size_t elfsz;
410 	int error, try;
411 
412 	/*
413 	 * Limit one open() per process. The process must close()
414 	 * before open()'ing again.
415 	 */
416 	sx_xlock(&ksyms_mtx);
417 	LIST_FOREACH(sc, &ksyms_list, sc_list) {
418 		if (sc->sc_proc == td->td_proc) {
419 			sx_xunlock(&ksyms_mtx);
420 			return (EBUSY);
421 		}
422 	}
423 
424 	sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
425 	sc->sc_proc = td->td_proc;
426 	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
427 	sx_xunlock(&ksyms_mtx);
428 
429 	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
430 	if (error != 0) {
431 		ksyms_cdevpriv_dtr(sc);
432 		return (error);
433 	}
434 
435 	/*
436 	 * MOD_SLOCK doesn't work here (because of a lock reversal with
437 	 * KLD_SLOCK).  Therefore, simply try up to 3 times to get a "clean"
438 	 * snapshot of the kernel symbol table.  This should work fine in the
439 	 * rare case of a kernel module being loaded/unloaded at the same
440 	 * time.
441 	 */
442 	for (try = 0; try < 3; try++) {
443 		ksyms_size_calc(&ts);
444 		elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;
445 
446 		object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(elfsz),
447 		    VM_PROT_ALL, 0, td->td_ucred);
448 		sc->sc_obj = object;
449 		sc->sc_objsz = elfsz;
450 
451 		error = ksyms_snapshot(sc, &ts);
452 		if (error == 0)
453 			break;
454 
455 		vm_object_deallocate(sc->sc_obj);
456 		sc->sc_obj = NULL;
457 	}
458 	return (error);
459 }
460 
461 static int
462 ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
463 {
464 	struct ksyms_softc *sc;
465 	int error;
466 
467 	error = devfs_get_cdevpriv((void **)&sc);
468 	if (error != 0)
469 		return (error);
470 	return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
471 }
472 
473 static int
474 ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
475     vm_object_t *objp, int nprot)
476 {
477 	struct ksyms_softc *sc;
478 	vm_object_t obj;
479 	int error;
480 
481 	error = devfs_get_cdevpriv((void **)&sc);
482 	if (error != 0)
483 		return (error);
484 
485 	if (*offset >= round_page(sc->sc_objsz) ||
486 	    size > round_page(sc->sc_objsz) - *offset ||
487 	    (nprot & ~PROT_READ) != 0)
488 		return (EINVAL);
489 
490 	obj = sc->sc_obj;
491 	vm_object_reference(obj);
492 	*objp = obj;
493 	return (0);
494 }
495 
496 static int
497 ksyms_modevent(module_t mod __unused, int type, void *data __unused)
498 {
499 	int error;
500 
501 	error = 0;
502 	switch (type) {
503 	case MOD_LOAD:
504 		sx_init(&ksyms_mtx, "KSyms mtx");
505 		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
506 		    0400, KSYMS_DNAME);
507 		break;
508 	case MOD_UNLOAD:
509 		if (!LIST_EMPTY(&ksyms_list))
510 			return (EBUSY);
511 		destroy_dev(ksyms_dev);
512 		sx_destroy(&ksyms_mtx);
513 		break;
514 	case MOD_SHUTDOWN:
515 		break;
516 	default:
517 		error = EOPNOTSUPP;
518 		break;
519 	}
520 	return (error);
521 }
522 
523 DEV_MODULE(ksyms, ksyms_modevent, NULL);
524 MODULE_VERSION(ksyms, 1);
525