1 /*- 2 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 33 #include <sys/conf.h> 34 #include <sys/elf.h> 35 #include <sys/ksyms.h> 36 #include <sys/linker.h> 37 #include <sys/malloc.h> 38 #include <sys/mman.h> 39 #include <sys/module.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/queue.h> 43 #include <sys/resourcevar.h> 44 #include <sys/stat.h> 45 #include <sys/uio.h> 46 47 #include <machine/elf.h> 48 49 #include <vm/pmap.h> 50 #include <vm/vm.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_map.h> 53 54 #include "linker_if.h" 55 56 #define SHDR_NULL 0 57 #define SHDR_SYMTAB 1 58 #define SHDR_STRTAB 2 59 #define SHDR_SHSTRTAB 3 60 61 #define SHDR_NUM 4 62 63 #define STR_SYMTAB ".symtab" 64 #define STR_STRTAB ".strtab" 65 #define STR_SHSTRTAB ".shstrtab" 66 67 #define KSYMS_DNAME "ksyms" 68 69 static d_open_t ksyms_open; 70 static d_read_t ksyms_read; 71 static d_close_t ksyms_close; 72 static d_ioctl_t ksyms_ioctl; 73 static d_mmap_t ksyms_mmap; 74 75 static struct cdevsw ksyms_cdevsw = { 76 .d_version = D_VERSION, 77 .d_flags = D_TRACKCLOSE, 78 .d_open = ksyms_open, 79 .d_close = ksyms_close, 80 .d_read = ksyms_read, 81 .d_ioctl = ksyms_ioctl, 82 .d_mmap = ksyms_mmap, 83 .d_name = KSYMS_DNAME 84 }; 85 86 struct ksyms_softc { 87 LIST_ENTRY(ksyms_softc) sc_list; 88 vm_offset_t sc_uaddr; 89 size_t sc_usize; 90 pmap_t sc_pmap; 91 struct proc *sc_proc; 92 }; 93 94 static struct mtx ksyms_mtx; 95 static struct cdev *ksyms_dev; 96 static LIST_HEAD(, ksyms_softc) ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list); 97 98 static const char ksyms_shstrtab[] = 99 "\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0"; 100 101 struct ksyms_hdr { 102 Elf_Ehdr kh_ehdr; 103 Elf_Phdr kh_txtphdr; 104 Elf_Phdr kh_datphdr; 105 Elf_Shdr kh_shdr[SHDR_NUM]; 106 char kh_shstrtab[sizeof(ksyms_shstrtab)]; 107 }; 108 109 struct tsizes { 110 size_t ts_symsz; 111 size_t ts_strsz; 112 }; 113 114 struct toffsets { 115 vm_offset_t to_symoff; 116 vm_offset_t to_stroff; 117 unsigned to_stridx; 118 size_t to_resid; 119 }; 120 121 static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table"); 122 123 /* 124 * Get the symbol and string table sizes for a kernel module. Add it to the 125 * running total. 126 */ 127 static int 128 ksyms_size_permod(linker_file_t lf, void *arg) 129 { 130 struct tsizes *ts; 131 const Elf_Sym *symtab; 132 caddr_t strtab; 133 long syms; 134 135 ts = arg; 136 137 syms = LINKER_SYMTAB_GET(lf, &symtab); 138 ts->ts_symsz += syms * sizeof(Elf_Sym); 139 ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab); 140 141 return (0); 142 } 143 144 /* 145 * For kernel module get the symbol and string table sizes, returning the 146 * totals in *ts. 147 */ 148 static void 149 ksyms_size_calc(struct tsizes *ts) 150 { 151 152 ts->ts_symsz = 0; 153 ts->ts_strsz = 0; 154 155 (void)linker_file_foreach(ksyms_size_permod, ts); 156 } 157 158 #define KSYMS_EMIT(src, des, sz) do { \ 159 copyout(src, (void *)des, sz); \ 160 des += sz; \ 161 } while (0) 162 163 #define SYMBLKSZ (256 * sizeof(Elf_Sym)) 164 165 /* 166 * For a kernel module, add the symbol and string tables into the 167 * snapshot buffer. Fix up the offsets in the tables. 168 */ 169 static int 170 ksyms_add(linker_file_t lf, void *arg) 171 { 172 char *buf; 173 struct toffsets *to; 174 const Elf_Sym *symtab; 175 Elf_Sym *symp; 176 caddr_t strtab; 177 long symsz; 178 size_t strsz, numsyms; 179 linker_symval_t symval; 180 int i, nsyms, len; 181 182 to = arg; 183 184 MOD_SLOCK; 185 numsyms = LINKER_SYMTAB_GET(lf, &symtab); 186 strsz = LINKER_STRTAB_GET(lf, &strtab); 187 symsz = numsyms * sizeof(Elf_Sym); 188 189 buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK); 190 191 while (symsz > 0) { 192 len = min(SYMBLKSZ, symsz); 193 bcopy(symtab, buf, len); 194 195 /* 196 * Fix up symbol table for kernel modules: 197 * string offsets need adjusted 198 * symbol values made absolute 199 */ 200 symp = (Elf_Sym *) buf; 201 nsyms = len / sizeof(Elf_Sym); 202 for (i = 0; i < nsyms; i++) { 203 symp[i].st_name += to->to_stridx; 204 if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf, 205 (c_linker_sym_t)&symtab[i], &symval) == 0) { 206 symp[i].st_value = (uintptr_t)symval.value; 207 } 208 } 209 210 if (len > to->to_resid) { 211 MOD_SUNLOCK; 212 free(buf, M_KSYMS); 213 return (ENXIO); 214 } 215 to->to_resid -= len; 216 KSYMS_EMIT(buf, to->to_symoff, len); 217 218 symtab += nsyms; 219 symsz -= len; 220 } 221 free(buf, M_KSYMS); 222 MOD_SUNLOCK; 223 224 if (strsz > to->to_resid) 225 return (ENXIO); 226 to->to_resid -= strsz; 227 KSYMS_EMIT(strtab, to->to_stroff, strsz); 228 to->to_stridx += strsz; 229 230 return (0); 231 } 232 233 /* 234 * Create a single ELF symbol table for the kernel and kernel modules loaded 235 * at this time. Write this snapshot out in the process address space. Return 236 * 0 on success, otherwise error. 237 */ 238 static int 239 ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid) 240 { 241 struct ksyms_hdr *hdr; 242 struct toffsets to; 243 int error = 0; 244 245 hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO); 246 247 /* 248 * Create the ELF header. 249 */ 250 hdr->kh_ehdr.e_ident[EI_PAD] = 0; 251 hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0; 252 hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1; 253 hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2; 254 hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3; 255 hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA; 256 hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 257 hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS; 258 hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT; 259 hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0; 260 hdr->kh_ehdr.e_type = ET_EXEC; 261 hdr->kh_ehdr.e_machine = ELF_ARCH; 262 hdr->kh_ehdr.e_version = EV_CURRENT; 263 hdr->kh_ehdr.e_entry = 0; 264 hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr); 265 hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr); 266 hdr->kh_ehdr.e_flags = 0; 267 hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr); 268 hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr); 269 hdr->kh_ehdr.e_phnum = 2; /* Text and Data */ 270 hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr); 271 hdr->kh_ehdr.e_shnum = SHDR_NUM; 272 hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB; 273 274 /* 275 * Add both the text and data program headers. 276 */ 277 hdr->kh_txtphdr.p_type = PT_LOAD; 278 /* XXX - is there a way to put the actual .text addr/size here? */ 279 hdr->kh_txtphdr.p_vaddr = 0; 280 hdr->kh_txtphdr.p_memsz = 0; 281 hdr->kh_txtphdr.p_flags = PF_R | PF_X; 282 283 hdr->kh_datphdr.p_type = PT_LOAD; 284 /* XXX - is there a way to put the actual .data addr/size here? */ 285 hdr->kh_datphdr.p_vaddr = 0; 286 hdr->kh_datphdr.p_memsz = 0; 287 hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X; 288 289 /* 290 * Add the section headers: null, symtab, strtab, shstrtab. 291 */ 292 293 /* First section header - null */ 294 295 /* Second section header - symtab */ 296 hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */ 297 hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB; 298 hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0; 299 hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0; 300 hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr); 301 hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz; 302 hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB; 303 hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym); 304 hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long); 305 hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym); 306 307 /* Third section header - strtab */ 308 hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB); 309 hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB; 310 hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0; 311 hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0; 312 hdr->kh_shdr[SHDR_STRTAB].sh_offset = 313 hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz; 314 hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz; 315 hdr->kh_shdr[SHDR_STRTAB].sh_link = 0; 316 hdr->kh_shdr[SHDR_STRTAB].sh_info = 0; 317 hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char); 318 hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0; 319 320 /* Fourth section - shstrtab */ 321 hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) + 322 sizeof(STR_STRTAB); 323 hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB; 324 hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0; 325 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0; 326 hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset = 327 offsetof(struct ksyms_hdr, kh_shstrtab); 328 hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab); 329 hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0; 330 hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0; 331 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */; 332 hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0; 333 334 /* Copy shstrtab into the header. */ 335 bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab)); 336 337 to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset; 338 to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset; 339 to.to_stridx = 0; 340 if (sizeof(struct ksyms_hdr) > resid) { 341 free(hdr, M_KSYMS); 342 return (ENXIO); 343 } 344 to.to_resid = resid - sizeof(struct ksyms_hdr); 345 346 /* emit header */ 347 copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr)); 348 349 free(hdr, M_KSYMS); 350 351 /* Add symbol and string tables for each kernel module. */ 352 error = linker_file_foreach(ksyms_add, &to); 353 354 if (to.to_resid != 0) 355 return (ENXIO); 356 357 return (error); 358 } 359 360 static void 361 ksyms_cdevpriv_dtr(void *data) 362 { 363 struct ksyms_softc *sc; 364 365 sc = (struct ksyms_softc *)data; 366 367 mtx_lock(&ksyms_mtx); 368 LIST_REMOVE(sc, sc_list); 369 mtx_unlock(&ksyms_mtx); 370 free(sc, M_KSYMS); 371 } 372 373 static int 374 ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td) 375 { 376 struct tsizes ts; 377 struct ksyms_softc *sc; 378 size_t total_elf_sz; 379 int error, try; 380 381 /* 382 * Limit one open() per process. The process must close() 383 * before open()'ing again. 384 */ 385 mtx_lock(&ksyms_mtx); 386 LIST_FOREACH(sc, &ksyms_list, sc_list) { 387 if (sc->sc_proc == td->td_proc) { 388 mtx_unlock(&ksyms_mtx); 389 return (EBUSY); 390 } 391 } 392 393 sc = malloc(sizeof(*sc), M_KSYMS, M_NOWAIT | M_ZERO); 394 if (sc == NULL) { 395 mtx_unlock(&ksyms_mtx); 396 return (ENOMEM); 397 } 398 sc->sc_proc = td->td_proc; 399 sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap; 400 LIST_INSERT_HEAD(&ksyms_list, sc, sc_list); 401 mtx_unlock(&ksyms_mtx); 402 403 error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr); 404 if (error != 0) 405 goto failed; 406 407 /* 408 * MOD_SLOCK doesn't work here (because of a lock reversal with 409 * KLD_SLOCK). Therefore, simply try up to 3 times to get a "clean" 410 * snapshot of the kernel symbol table. This should work fine in the 411 * rare case of a kernel module being loaded/unloaded at the same 412 * time. 413 */ 414 for (try = 0; try < 3; try++) { 415 /* 416 * Map a buffer in the calling process memory space and 417 * create a snapshot of the kernel symbol table in it. 418 */ 419 420 /* Compute the size of buffer needed. */ 421 ksyms_size_calc(&ts); 422 total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz + 423 ts.ts_strsz; 424 425 error = copyout_map(td, &sc->sc_uaddr, (vm_size_t)total_elf_sz); 426 if (error != 0) 427 break; 428 sc->sc_usize = total_elf_sz; 429 430 error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz); 431 if (error == 0) 432 /* successful snapshot */ 433 return (0); 434 435 /* Snapshot failed, unmap the memory and try again. */ 436 (void)copyout_unmap(td, sc->sc_uaddr, sc->sc_usize); 437 } 438 439 failed: 440 ksyms_cdevpriv_dtr(sc); 441 return (error); 442 } 443 444 static int 445 ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused) 446 { 447 struct ksyms_softc *sc; 448 char *buf; 449 off_t off; 450 size_t len, sz; 451 vm_size_t ubase; 452 int error; 453 454 error = devfs_get_cdevpriv((void **)&sc); 455 if (error != 0) 456 return (error); 457 458 off = uio->uio_offset; 459 len = uio->uio_resid; 460 461 if (off < 0 || off > sc->sc_usize) 462 return (EFAULT); 463 464 if (len > sc->sc_usize - off) 465 len = sc->sc_usize - off; 466 if (len == 0) 467 return (0); 468 469 /* 470 * Since the snapshot buffer is in the user space we have to copy it 471 * in to the kernel and then back out. The extra copy saves valuable 472 * kernel memory. 473 */ 474 buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK); 475 ubase = sc->sc_uaddr + off; 476 477 while (len) { 478 sz = min(PAGE_SIZE, len); 479 if (copyin((void *)ubase, buf, sz) != 0) 480 error = EFAULT; 481 else 482 error = uiomove(buf, sz, uio); 483 if (error != 0) 484 break; 485 486 len -= sz; 487 ubase += sz; 488 } 489 free(buf, M_KSYMS); 490 491 return (error); 492 } 493 494 static int 495 ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused, 496 struct thread *td __unused) 497 { 498 struct ksyms_softc *sc; 499 int error; 500 501 error = devfs_get_cdevpriv((void **)&sc); 502 if (error != 0) 503 return (error); 504 505 switch (cmd) { 506 case KIOCGSIZE: 507 /* 508 * Return the size (in bytes) of the symbol table 509 * snapshot. 510 */ 511 *(size_t *)data = sc->sc_usize; 512 break; 513 case KIOCGADDR: 514 /* 515 * Return the address of the symbol table snapshot. 516 * XXX - compat32 version of this? 517 */ 518 *(void **)data = (void *)sc->sc_uaddr; 519 break; 520 default: 521 error = ENOTTY; 522 break; 523 } 524 525 return (error); 526 } 527 528 static int 529 ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 530 int prot __unused, vm_memattr_t *memattr __unused) 531 { 532 struct ksyms_softc *sc; 533 int error; 534 535 error = devfs_get_cdevpriv((void **)&sc); 536 if (error != 0) 537 return (error); 538 539 /* 540 * XXX mmap() will actually map the symbol table into the process 541 * address space again. 542 */ 543 if (offset > round_page(sc->sc_usize) || 544 (*paddr = pmap_extract(sc->sc_pmap, 545 (vm_offset_t)sc->sc_uaddr + offset)) == 0) 546 return (-1); 547 548 return (0); 549 } 550 551 static int 552 ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused, 553 struct thread *td) 554 { 555 struct ksyms_softc *sc; 556 int error; 557 558 error = devfs_get_cdevpriv((void **)&sc); 559 if (error != 0) 560 return (error); 561 562 /* Unmap the buffer from the process address space. */ 563 return (copyout_unmap(td, sc->sc_uaddr, sc->sc_usize)); 564 } 565 566 static int 567 ksyms_modevent(module_t mod __unused, int type, void *data __unused) 568 { 569 int error; 570 571 error = 0; 572 switch (type) { 573 case MOD_LOAD: 574 mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF); 575 ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL, 576 0400, KSYMS_DNAME); 577 break; 578 case MOD_UNLOAD: 579 if (!LIST_EMPTY(&ksyms_list)) 580 return (EBUSY); 581 destroy_dev(ksyms_dev); 582 mtx_destroy(&ksyms_mtx); 583 break; 584 case MOD_SHUTDOWN: 585 break; 586 default: 587 error = EOPNOTSUPP; 588 break; 589 } 590 return (error); 591 } 592 593 DEV_MODULE(ksyms, ksyms_modevent, NULL); 594 MODULE_VERSION(ksyms, 1); 595