xref: /freebsd/sys/dev/ksyms/ksyms.c (revision ccb59683b98360afaf5b5bb641a68fea22c68d0b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 
35 #include <sys/conf.h>
36 #include <sys/elf.h>
37 #include <sys/linker.h>
38 #include <sys/malloc.h>
39 #include <sys/mman.h>
40 #include <sys/module.h>
41 #include <sys/proc.h>
42 #include <sys/queue.h>
43 #include <sys/resourcevar.h>
44 #include <sys/rwlock.h>
45 #include <sys/stat.h>
46 #include <sys/sx.h>
47 #include <sys/uio.h>
48 
49 #include <machine/elf.h>
50 
51 #include <vm/pmap.h>
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_pager.h>
57 
58 #include "linker_if.h"
59 
60 #define SHDR_NULL	0
61 #define SHDR_SYMTAB	1
62 #define SHDR_STRTAB	2
63 #define SHDR_SHSTRTAB	3
64 
65 #define SHDR_NUM	4
66 
67 #define STR_SYMTAB	".symtab"
68 #define STR_STRTAB	".strtab"
69 #define STR_SHSTRTAB	".shstrtab"
70 
71 #define KSYMS_DNAME	"ksyms"
72 
73 static d_open_t ksyms_open;
74 static d_read_t ksyms_read;
75 static d_mmap_single_t ksyms_mmap_single;
76 
77 static struct cdevsw ksyms_cdevsw = {
78 	.d_version =	D_VERSION,
79 	.d_flags =	0,
80 	.d_open =	ksyms_open,
81 	.d_read =	ksyms_read,
82 	.d_mmap_single = ksyms_mmap_single,
83 	.d_name =	KSYMS_DNAME
84 };
85 
86 struct ksyms_softc {
87 	LIST_ENTRY(ksyms_softc)	sc_list;
88 	vm_offset_t		sc_uaddr;
89 	size_t			sc_usize;
90 	vm_object_t		sc_obj;
91 	vm_size_t		sc_objsz;
92 	struct proc	       *sc_proc;
93 };
94 
95 static struct sx		 ksyms_mtx;
96 static struct cdev		*ksyms_dev;
97 static LIST_HEAD(, ksyms_softc)	 ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
98 
99 static const char	ksyms_shstrtab[] =
100 	"\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0";
101 
102 struct ksyms_hdr {
103 	Elf_Ehdr	kh_ehdr;
104 	Elf_Phdr	kh_txtphdr;
105 	Elf_Phdr	kh_datphdr;
106 	Elf_Shdr	kh_shdr[SHDR_NUM];
107 	char		kh_shstrtab[sizeof(ksyms_shstrtab)];
108 };
109 
110 struct tsizes {
111 	size_t		ts_symsz;
112 	size_t		ts_strsz;
113 };
114 
115 struct toffsets {
116 	struct ksyms_softc *to_sc;
117 	vm_offset_t	to_symoff;
118 	vm_offset_t	to_stroff;
119 	unsigned	to_stridx;
120 	size_t		to_resid;
121 };
122 
123 static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table");
124 
125 /*
126  * Get the symbol and string table sizes for a kernel module. Add it to the
127  * running total.
128  */
129 static int
130 ksyms_size_permod(linker_file_t lf, void *arg)
131 {
132 	struct tsizes *ts;
133 	const Elf_Sym *symtab;
134 	caddr_t strtab;
135 	long syms;
136 
137 	ts = arg;
138 
139 	syms = LINKER_SYMTAB_GET(lf, &symtab);
140 	ts->ts_symsz += syms * sizeof(Elf_Sym);
141 	ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab);
142 
143 	return (0);
144 }
145 
146 /*
147  * For kernel module get the symbol and string table sizes, returning the
148  * totals in *ts.
149  */
150 static void
151 ksyms_size_calc(struct tsizes *ts)
152 {
153 
154 	ts->ts_symsz = 0;
155 	ts->ts_strsz = 0;
156 
157 	(void)linker_file_foreach(ksyms_size_permod, ts);
158 }
159 
160 static int
161 ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
162 {
163 	struct iovec iov;
164 	struct uio uio;
165 
166 	iov.iov_base = buf;
167 	iov.iov_len = sz;
168 	uio.uio_iov = &iov;
169 	uio.uio_iovcnt = 1;
170 	uio.uio_offset = off;
171 	uio.uio_resid = (ssize_t)sz;
172 	uio.uio_segflg = UIO_SYSSPACE;
173 	uio.uio_rw = UIO_WRITE;
174 	uio.uio_td = curthread;
175 
176 	return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
177 }
178 
179 #define SYMBLKSZ	(256 * sizeof(Elf_Sym))
180 
181 /*
182  * For a kernel module, add the symbol and string tables into the
183  * snapshot buffer.  Fix up the offsets in the tables.
184  */
185 static int
186 ksyms_add(linker_file_t lf, void *arg)
187 {
188 	char *buf;
189 	struct ksyms_softc *sc;
190 	struct toffsets *to;
191 	const Elf_Sym *symtab;
192 	Elf_Sym *symp;
193 	caddr_t strtab;
194 	size_t len, numsyms, strsz, symsz;
195 	linker_symval_t symval;
196 	int error, i, nsyms;
197 	bool fixup;
198 
199 	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
200 	to = arg;
201 	sc = to->to_sc;
202 
203 	MOD_SLOCK;
204 	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
205 	strsz = LINKER_STRTAB_GET(lf, &strtab);
206 	symsz = numsyms * sizeof(Elf_Sym);
207 
208 #ifdef RELOCATABLE_KERNEL
209 	fixup = true;
210 #else
211 	fixup = lf->id > 1;
212 #endif
213 
214 	while (symsz > 0) {
215 		len = min(SYMBLKSZ, symsz);
216 		bcopy(symtab, buf, len);
217 
218 		/*
219 		 * Fix up symbol table for kernel modules:
220 		 *   string offsets need adjusted
221 		 *   symbol values made absolute
222 		 */
223 		symp = (Elf_Sym *) buf;
224 		nsyms = len / sizeof(Elf_Sym);
225 		for (i = 0; i < nsyms; i++) {
226 			symp[i].st_name += to->to_stridx;
227 			if (fixup && LINKER_SYMBOL_VALUES(lf,
228 			    (c_linker_sym_t)&symtab[i], &symval) == 0) {
229 				symp[i].st_value = (uintptr_t)symval.value;
230 			}
231 		}
232 
233 		if (len > to->to_resid) {
234 			MOD_SUNLOCK;
235 			free(buf, M_KSYMS);
236 			return (ENXIO);
237 		}
238 		to->to_resid -= len;
239 		error = ksyms_emit(sc, buf, to->to_symoff, len);
240 		to->to_symoff += len;
241 		if (error != 0) {
242 			MOD_SUNLOCK;
243 			free(buf, M_KSYMS);
244 			return (error);
245 		}
246 
247 		symtab += nsyms;
248 		symsz -= len;
249 	}
250 	free(buf, M_KSYMS);
251 	MOD_SUNLOCK;
252 
253 	if (strsz > to->to_resid)
254 		return (ENXIO);
255 	to->to_resid -= strsz;
256 	error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
257 	to->to_stroff += strsz;
258 	to->to_stridx += strsz;
259 
260 	return (error);
261 }
262 
263 /*
264  * Create a single ELF symbol table for the kernel and kernel modules loaded
265  * at this time. Write this snapshot out in the process address space. Return
266  * 0 on success, otherwise error.
267  */
268 static int
269 ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
270 {
271 	struct toffsets	to;
272 	struct ksyms_hdr *hdr;
273 	int error;
274 
275 	hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
276 
277 	/*
278 	 * Create the ELF header.
279 	 */
280 	hdr->kh_ehdr.e_ident[EI_PAD] = 0;
281 	hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0;
282 	hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1;
283 	hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2;
284 	hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3;
285 	hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA;
286 	hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
287 	hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS;
288 	hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT;
289 	hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0;
290 	hdr->kh_ehdr.e_type = ET_EXEC;
291 	hdr->kh_ehdr.e_machine = ELF_ARCH;
292 	hdr->kh_ehdr.e_version = EV_CURRENT;
293 	hdr->kh_ehdr.e_entry = 0;
294 	hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr);
295 	hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr);
296 	hdr->kh_ehdr.e_flags = 0;
297 	hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr);
298 	hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
299 	hdr->kh_ehdr.e_phnum = 2;	/* Text and Data */
300 	hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
301 	hdr->kh_ehdr.e_shnum = SHDR_NUM;
302 	hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB;
303 
304 	/*
305 	 * Add both the text and data program headers.
306 	 */
307 	hdr->kh_txtphdr.p_type = PT_LOAD;
308 	/* XXX - is there a way to put the actual .text addr/size here? */
309 	hdr->kh_txtphdr.p_vaddr = 0;
310 	hdr->kh_txtphdr.p_memsz = 0;
311 	hdr->kh_txtphdr.p_flags = PF_R | PF_X;
312 
313 	hdr->kh_datphdr.p_type = PT_LOAD;
314 	/* XXX - is there a way to put the actual .data addr/size here? */
315 	hdr->kh_datphdr.p_vaddr = 0;
316 	hdr->kh_datphdr.p_memsz = 0;
317 	hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X;
318 
319 	/*
320 	 * Add the section headers: null, symtab, strtab, shstrtab.
321 	 */
322 
323 	/* First section header - null */
324 
325 	/* Second section header - symtab */
326 	hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */
327 	hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB;
328 	hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0;
329 	hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0;
330 	hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr);
331 	hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz;
332 	hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB;
333 	hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym);
334 	hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long);
335 	hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym);
336 
337 	/* Third section header - strtab */
338 	hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB);
339 	hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB;
340 	hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0;
341 	hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0;
342 	hdr->kh_shdr[SHDR_STRTAB].sh_offset =
343 	    hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz;
344 	hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz;
345 	hdr->kh_shdr[SHDR_STRTAB].sh_link = 0;
346 	hdr->kh_shdr[SHDR_STRTAB].sh_info = 0;
347 	hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char);
348 	hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0;
349 
350 	/* Fourth section - shstrtab */
351 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) +
352 	    sizeof(STR_STRTAB);
353 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB;
354 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0;
355 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0;
356 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset =
357 	    offsetof(struct ksyms_hdr, kh_shstrtab);
358 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab);
359 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0;
360 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0;
361 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */;
362 	hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0;
363 
364 	/* Copy shstrtab into the header. */
365 	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
366 
367 	to.to_sc = sc;
368 	to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
369 	to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
370 	to.to_stridx = 0;
371 	to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);
372 
373 	/* emit header */
374 	error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
375 	free(hdr, M_KSYMS);
376 	if (error != 0)
377 		return (error);
378 
379 	/* Add symbol and string tables for each kernel module. */
380 	error = linker_file_foreach(ksyms_add, &to);
381 	if (error != 0)
382 		return (error);
383 	if (to.to_resid != 0)
384 		return (ENXIO);
385 	return (0);
386 }
387 
388 static void
389 ksyms_cdevpriv_dtr(void *data)
390 {
391 	struct ksyms_softc *sc;
392 	vm_object_t obj;
393 
394 	sc = (struct ksyms_softc *)data;
395 
396 	sx_xlock(&ksyms_mtx);
397 	LIST_REMOVE(sc, sc_list);
398 	sx_xunlock(&ksyms_mtx);
399 	obj = sc->sc_obj;
400 	if (obj != NULL)
401 		vm_object_deallocate(obj);
402 	free(sc, M_KSYMS);
403 }
404 
405 static int
406 ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
407 {
408 	struct tsizes ts;
409 	struct ksyms_softc *sc;
410 	vm_object_t object;
411 	vm_size_t elfsz;
412 	int error, try;
413 
414 	/*
415 	 * Limit one open() per process. The process must close()
416 	 * before open()'ing again.
417 	 */
418 	sx_xlock(&ksyms_mtx);
419 	LIST_FOREACH(sc, &ksyms_list, sc_list) {
420 		if (sc->sc_proc == td->td_proc) {
421 			sx_xunlock(&ksyms_mtx);
422 			return (EBUSY);
423 		}
424 	}
425 
426 	sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
427 	sc->sc_proc = td->td_proc;
428 	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
429 	sx_xunlock(&ksyms_mtx);
430 
431 	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
432 	if (error != 0) {
433 		ksyms_cdevpriv_dtr(sc);
434 		return (error);
435 	}
436 
437 	/*
438 	 * MOD_SLOCK doesn't work here (because of a lock reversal with
439 	 * KLD_SLOCK).  Therefore, simply try up to 3 times to get a "clean"
440 	 * snapshot of the kernel symbol table.  This should work fine in the
441 	 * rare case of a kernel module being loaded/unloaded at the same
442 	 * time.
443 	 */
444 	for (try = 0; try < 3; try++) {
445 		ksyms_size_calc(&ts);
446 		elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;
447 
448 		object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(elfsz),
449 		    VM_PROT_ALL, 0, td->td_ucred);
450 		sc->sc_obj = object;
451 		sc->sc_objsz = elfsz;
452 
453 		error = ksyms_snapshot(sc, &ts);
454 		if (error == 0)
455 			break;
456 
457 		vm_object_deallocate(sc->sc_obj);
458 		sc->sc_obj = NULL;
459 	}
460 	return (error);
461 }
462 
463 static int
464 ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
465 {
466 	struct ksyms_softc *sc;
467 	int error;
468 
469 	error = devfs_get_cdevpriv((void **)&sc);
470 	if (error != 0)
471 		return (error);
472 	return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
473 }
474 
475 static int
476 ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
477     vm_object_t *objp, int nprot)
478 {
479 	struct ksyms_softc *sc;
480 	vm_object_t obj;
481 	int error;
482 
483 	error = devfs_get_cdevpriv((void **)&sc);
484 	if (error != 0)
485 		return (error);
486 
487 	if (*offset >= round_page(sc->sc_objsz) ||
488 	    size > round_page(sc->sc_objsz) - *offset ||
489 	    (nprot & ~PROT_READ) != 0)
490 		return (EINVAL);
491 
492 	obj = sc->sc_obj;
493 	vm_object_reference(obj);
494 	*objp = obj;
495 	return (0);
496 }
497 
498 static int
499 ksyms_modevent(module_t mod __unused, int type, void *data __unused)
500 {
501 	int error;
502 
503 	error = 0;
504 	switch (type) {
505 	case MOD_LOAD:
506 		sx_init(&ksyms_mtx, "KSyms mtx");
507 		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
508 		    0400, KSYMS_DNAME);
509 		break;
510 	case MOD_UNLOAD:
511 		if (!LIST_EMPTY(&ksyms_list))
512 			return (EBUSY);
513 		destroy_dev(ksyms_dev);
514 		sx_destroy(&ksyms_mtx);
515 		break;
516 	case MOD_SHUTDOWN:
517 		break;
518 	default:
519 		error = EOPNOTSUPP;
520 		break;
521 	}
522 	return (error);
523 }
524 
525 DEV_MODULE(ksyms, ksyms_modevent, NULL);
526 MODULE_VERSION(ksyms, 1);
527