1 /*- 2 * Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 33 #include <sys/conf.h> 34 #include <sys/elf.h> 35 #include <sys/ksyms.h> 36 #include <sys/linker.h> 37 #include <sys/malloc.h> 38 #include <sys/mman.h> 39 #include <sys/module.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/queue.h> 43 #include <sys/resourcevar.h> 44 #include <sys/stat.h> 45 #include <sys/uio.h> 46 47 #include <machine/elf.h> 48 49 #include <vm/pmap.h> 50 #include <vm/vm.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_map.h> 53 54 #include "linker_if.h" 55 56 #define SHDR_NULL 0 57 #define SHDR_SYMTAB 1 58 #define SHDR_STRTAB 2 59 #define SHDR_SHSTRTAB 3 60 61 #define SHDR_NUM 4 62 63 #define STR_SYMTAB ".symtab" 64 #define STR_STRTAB ".strtab" 65 #define STR_SHSTRTAB ".shstrtab" 66 67 #define KSYMS_DNAME "ksyms" 68 69 static d_open_t ksyms_open; 70 static d_read_t ksyms_read; 71 static d_close_t ksyms_close; 72 static d_ioctl_t ksyms_ioctl; 73 static d_mmap_t ksyms_mmap; 74 75 static struct cdevsw ksyms_cdevsw = { 76 .d_version = D_VERSION, 77 .d_flags = D_TRACKCLOSE, 78 .d_open = ksyms_open, 79 .d_close = ksyms_close, 80 .d_read = ksyms_read, 81 .d_ioctl = ksyms_ioctl, 82 .d_mmap = ksyms_mmap, 83 .d_name = KSYMS_DNAME 84 }; 85 86 struct ksyms_softc { 87 LIST_ENTRY(ksyms_softc) sc_list; 88 vm_offset_t sc_uaddr; 89 size_t sc_usize; 90 pmap_t sc_pmap; 91 struct proc *sc_proc; 92 }; 93 94 static struct mtx ksyms_mtx; 95 static struct cdev *ksyms_dev; 96 static LIST_HEAD(, ksyms_softc) ksyms_list = 97 LIST_HEAD_INITIALIZER(ksyms_list); 98 99 static const char ksyms_shstrtab[] = 100 "\0" STR_SYMTAB "\0" STR_STRTAB "\0" STR_SHSTRTAB "\0"; 101 102 struct ksyms_hdr { 103 Elf_Ehdr kh_ehdr; 104 Elf_Phdr kh_txtphdr; 105 Elf_Phdr kh_datphdr; 106 Elf_Shdr kh_shdr[SHDR_NUM]; 107 char kh_shstrtab[sizeof(ksyms_shstrtab)]; 108 }; 109 110 struct tsizes { 111 size_t ts_symsz; 112 size_t ts_strsz; 113 }; 114 115 struct toffsets { 116 vm_offset_t to_symoff; 117 vm_offset_t to_stroff; 118 unsigned to_stridx; 119 size_t to_resid; 120 }; 121 122 static MALLOC_DEFINE(M_KSYMS, "KSYMS", "Kernel Symbol Table"); 123 124 /* 125 * Get the symbol and string table sizes for a kernel module. Add it to the 126 * running total. 127 */ 128 static int 129 ksyms_size_permod(linker_file_t lf, void *arg) 130 { 131 struct tsizes *ts; 132 const Elf_Sym *symtab; 133 caddr_t strtab; 134 long syms; 135 136 ts = arg; 137 138 syms = LINKER_SYMTAB_GET(lf, &symtab); 139 ts->ts_symsz += syms * sizeof(Elf_Sym); 140 ts->ts_strsz += LINKER_STRTAB_GET(lf, &strtab); 141 142 return (0); 143 } 144 145 /* 146 * For kernel module get the symbol and string table sizes, returning the 147 * totals in *ts. 148 */ 149 static void 150 ksyms_size_calc(struct tsizes *ts) 151 { 152 ts->ts_symsz = 0; 153 ts->ts_strsz = 0; 154 155 (void) linker_file_foreach(ksyms_size_permod, ts); 156 } 157 158 #define KSYMS_EMIT(src, des, sz) do { \ 159 copyout(src, (void *)des, sz); \ 160 des += sz; \ 161 } while (0) 162 163 #define SYMBLKSZ 256 * sizeof (Elf_Sym) 164 165 /* 166 * For a kernel module, add the symbol and string tables into the 167 * snapshot buffer. Fix up the offsets in the tables. 168 */ 169 static int 170 ksyms_add(linker_file_t lf, void *arg) 171 { 172 struct toffsets *to; 173 const Elf_Sym *symtab; 174 Elf_Sym *symp; 175 caddr_t strtab; 176 long symsz; 177 size_t strsz, numsyms; 178 linker_symval_t symval; 179 char *buf; 180 int i, nsyms, len; 181 182 to = arg; 183 184 MOD_SLOCK; 185 numsyms = LINKER_SYMTAB_GET(lf, &symtab); 186 strsz = LINKER_STRTAB_GET(lf, &strtab); 187 symsz = numsyms * sizeof(Elf_Sym); 188 189 buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK); 190 191 while (symsz > 0) { 192 len = min(SYMBLKSZ, symsz); 193 bcopy(symtab, buf, len); 194 195 /* 196 * Fix up symbol table for kernel modules: 197 * string offsets need adjusted 198 * symbol values made absolute 199 */ 200 symp = (Elf_Sym *) buf; 201 nsyms = len / sizeof (Elf_Sym); 202 for (i = 0; i < nsyms; i++) { 203 symp[i].st_name += to->to_stridx; 204 if (lf->id > 1 && LINKER_SYMBOL_VALUES(lf, 205 (c_linker_sym_t) &symtab[i], &symval) == 0) { 206 symp[i].st_value = (uintptr_t) symval.value; 207 } 208 } 209 210 if (len > to->to_resid) { 211 MOD_SUNLOCK; 212 free(buf, M_KSYMS); 213 return (ENXIO); 214 } else 215 to->to_resid -= len; 216 KSYMS_EMIT(buf, to->to_symoff, len); 217 218 symtab += nsyms; 219 symsz -= len; 220 } 221 free(buf, M_KSYMS); 222 MOD_SUNLOCK; 223 224 if (strsz > to->to_resid) 225 return (ENXIO); 226 else 227 to->to_resid -= strsz; 228 KSYMS_EMIT(strtab, to->to_stroff, strsz); 229 to->to_stridx += strsz; 230 231 return (0); 232 } 233 234 /* 235 * Create a single ELF symbol table for the kernel and kernel modules loaded 236 * at this time. Write this snapshot out in the process address space. Return 237 * 0 on success, otherwise error. 238 */ 239 static int 240 ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid) 241 { 242 243 struct ksyms_hdr *hdr; 244 struct toffsets to; 245 int error = 0; 246 247 /* Be kernel stack friendly */ 248 hdr = malloc(sizeof (*hdr), M_KSYMS, M_WAITOK|M_ZERO); 249 250 /* 251 * Create the ELF header. 252 */ 253 hdr->kh_ehdr.e_ident[EI_PAD] = 0; 254 hdr->kh_ehdr.e_ident[EI_MAG0] = ELFMAG0; 255 hdr->kh_ehdr.e_ident[EI_MAG1] = ELFMAG1; 256 hdr->kh_ehdr.e_ident[EI_MAG2] = ELFMAG2; 257 hdr->kh_ehdr.e_ident[EI_MAG3] = ELFMAG3; 258 hdr->kh_ehdr.e_ident[EI_DATA] = ELF_DATA; 259 hdr->kh_ehdr.e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 260 hdr->kh_ehdr.e_ident[EI_CLASS] = ELF_CLASS; 261 hdr->kh_ehdr.e_ident[EI_VERSION] = EV_CURRENT; 262 hdr->kh_ehdr.e_ident[EI_ABIVERSION] = 0; 263 hdr->kh_ehdr.e_type = ET_EXEC; 264 hdr->kh_ehdr.e_machine = ELF_ARCH; 265 hdr->kh_ehdr.e_version = EV_CURRENT; 266 hdr->kh_ehdr.e_entry = 0; 267 hdr->kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_txtphdr); 268 hdr->kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr); 269 hdr->kh_ehdr.e_flags = 0; 270 hdr->kh_ehdr.e_ehsize = sizeof(Elf_Ehdr); 271 hdr->kh_ehdr.e_phentsize = sizeof(Elf_Phdr); 272 hdr->kh_ehdr.e_phnum = 2; /* Text and Data */ 273 hdr->kh_ehdr.e_shentsize = sizeof(Elf_Shdr); 274 hdr->kh_ehdr.e_shnum = SHDR_NUM; 275 hdr->kh_ehdr.e_shstrndx = SHDR_SHSTRTAB; 276 277 /* 278 * Add both the text and data Program headers. 279 */ 280 hdr->kh_txtphdr.p_type = PT_LOAD; 281 /* XXX - is there a way to put the actual .text addr/size here? */ 282 hdr->kh_txtphdr.p_vaddr = 0; 283 hdr->kh_txtphdr.p_memsz = 0; 284 hdr->kh_txtphdr.p_flags = PF_R | PF_X; 285 286 hdr->kh_datphdr.p_type = PT_LOAD; 287 /* XXX - is there a way to put the actual .data addr/size here? */ 288 hdr->kh_datphdr.p_vaddr = 0; 289 hdr->kh_datphdr.p_memsz = 0; 290 hdr->kh_datphdr.p_flags = PF_R | PF_W | PF_X; 291 292 /* 293 * Add the Section headers: null, symtab, strtab, shstrtab, 294 */ 295 296 /* First section header - null */ 297 298 /* Second section header - symtab */ 299 hdr->kh_shdr[SHDR_SYMTAB].sh_name = 1; /* String offset (skip null) */ 300 hdr->kh_shdr[SHDR_SYMTAB].sh_type = SHT_SYMTAB; 301 hdr->kh_shdr[SHDR_SYMTAB].sh_flags = 0; 302 hdr->kh_shdr[SHDR_SYMTAB].sh_addr = 0; 303 hdr->kh_shdr[SHDR_SYMTAB].sh_offset = sizeof(*hdr); 304 hdr->kh_shdr[SHDR_SYMTAB].sh_size = ts->ts_symsz; 305 hdr->kh_shdr[SHDR_SYMTAB].sh_link = SHDR_STRTAB; 306 hdr->kh_shdr[SHDR_SYMTAB].sh_info = ts->ts_symsz / sizeof(Elf_Sym); 307 hdr->kh_shdr[SHDR_SYMTAB].sh_addralign = sizeof(long); 308 hdr->kh_shdr[SHDR_SYMTAB].sh_entsize = sizeof(Elf_Sym); 309 310 /* Third section header - strtab */ 311 hdr->kh_shdr[SHDR_STRTAB].sh_name = 1 + sizeof(STR_SYMTAB); 312 hdr->kh_shdr[SHDR_STRTAB].sh_type = SHT_STRTAB; 313 hdr->kh_shdr[SHDR_STRTAB].sh_flags = 0; 314 hdr->kh_shdr[SHDR_STRTAB].sh_addr = 0; 315 hdr->kh_shdr[SHDR_STRTAB].sh_offset = 316 hdr->kh_shdr[SHDR_SYMTAB].sh_offset + ts->ts_symsz; 317 hdr->kh_shdr[SHDR_STRTAB].sh_size = ts->ts_strsz; 318 hdr->kh_shdr[SHDR_STRTAB].sh_link = 0; 319 hdr->kh_shdr[SHDR_STRTAB].sh_info = 0; 320 hdr->kh_shdr[SHDR_STRTAB].sh_addralign = sizeof(char); 321 hdr->kh_shdr[SHDR_STRTAB].sh_entsize = 0; 322 323 /* Fourth section - shstrtab */ 324 hdr->kh_shdr[SHDR_SHSTRTAB].sh_name = 1 + sizeof(STR_SYMTAB) + 325 sizeof(STR_STRTAB); 326 hdr->kh_shdr[SHDR_SHSTRTAB].sh_type = SHT_STRTAB; 327 hdr->kh_shdr[SHDR_SHSTRTAB].sh_flags = 0; 328 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addr = 0; 329 hdr->kh_shdr[SHDR_SHSTRTAB].sh_offset = 330 offsetof(struct ksyms_hdr, kh_shstrtab); 331 hdr->kh_shdr[SHDR_SHSTRTAB].sh_size = sizeof(ksyms_shstrtab); 332 hdr->kh_shdr[SHDR_SHSTRTAB].sh_link = 0; 333 hdr->kh_shdr[SHDR_SHSTRTAB].sh_info = 0; 334 hdr->kh_shdr[SHDR_SHSTRTAB].sh_addralign = 0 /* sizeof(char) */; 335 hdr->kh_shdr[SHDR_SHSTRTAB].sh_entsize = 0; 336 337 /* Copy shstrtab into the header */ 338 bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab)); 339 340 to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset; 341 to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset; 342 to.to_stridx = 0; 343 if (sizeof(struct ksyms_hdr) > resid) { 344 free(hdr, M_KSYMS); 345 return (ENXIO); 346 } 347 to.to_resid = resid - sizeof(struct ksyms_hdr); 348 349 /* Emit Header */ 350 copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr)); 351 352 free(hdr, M_KSYMS); 353 354 /* Add symbol and string tables for each kernelmodule */ 355 error = linker_file_foreach(ksyms_add, &to); 356 357 if (to.to_resid != 0) 358 return (ENXIO); 359 360 return (error); 361 } 362 363 static void 364 ksyms_cdevpriv_dtr(void *data) 365 { 366 struct ksyms_softc *sc; 367 368 sc = (struct ksyms_softc *)data; 369 370 mtx_lock(&ksyms_mtx); 371 LIST_REMOVE(sc, sc_list); 372 mtx_unlock(&ksyms_mtx); 373 free(sc, M_KSYMS); 374 } 375 376 /* ARGSUSED */ 377 static int 378 ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td) 379 { 380 struct tsizes ts; 381 size_t total_elf_sz; 382 int error, try; 383 struct ksyms_softc *sc; 384 385 /* 386 * Limit one open() per process. The process must close() 387 * before open()'ing again. 388 */ 389 mtx_lock(&ksyms_mtx); 390 LIST_FOREACH(sc, &ksyms_list, sc_list) { 391 if (sc->sc_proc == td->td_proc) { 392 mtx_unlock(&ksyms_mtx); 393 return (EBUSY); 394 } 395 } 396 397 sc = (struct ksyms_softc *) malloc(sizeof (*sc), M_KSYMS, 398 M_NOWAIT|M_ZERO); 399 400 if (sc == NULL) { 401 mtx_unlock(&ksyms_mtx); 402 return (ENOMEM); 403 } 404 sc->sc_proc = td->td_proc; 405 sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap; 406 LIST_INSERT_HEAD(&ksyms_list, sc, sc_list); 407 mtx_unlock(&ksyms_mtx); 408 409 error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr); 410 if (error) 411 goto failed; 412 413 /* 414 * MOD_SLOCK doesn't work here (because of a lock reversal with 415 * KLD_SLOCK). Therefore, simply try upto 3 times to get a "clean" 416 * snapshot of the kernel symbol table. This should work fine in the 417 * rare case of a kernel module being loaded/unloaded at the same 418 * time. 419 */ 420 for(try = 0; try < 3; try++) { 421 /* 422 * Map a buffer in the calling process memory space and 423 * create a snapshot of the kernel symbol table in it. 424 */ 425 426 /* Compute the size of buffer needed. */ 427 ksyms_size_calc(&ts); 428 total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz + 429 ts.ts_strsz; 430 431 error = copyout_map(td, &(sc->sc_uaddr), 432 (vm_size_t) total_elf_sz); 433 if (error) 434 break; 435 sc->sc_usize = total_elf_sz; 436 437 error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz); 438 if (!error) { 439 /* Successful Snapshot */ 440 return (0); 441 } 442 443 /* Snapshot failed, unmap the memory and try again */ 444 (void) copyout_unmap(td, sc->sc_uaddr, sc->sc_usize); 445 } 446 447 failed: 448 ksyms_cdevpriv_dtr(sc); 449 return (error); 450 } 451 452 /* ARGSUSED */ 453 static int 454 ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused) 455 { 456 int error; 457 size_t len, sz; 458 struct ksyms_softc *sc; 459 off_t off; 460 char *buf; 461 vm_size_t ubase; 462 463 error = devfs_get_cdevpriv((void **)&sc); 464 if (error) 465 return (error); 466 467 off = uio->uio_offset; 468 len = uio->uio_resid; 469 470 if (off < 0 || off > sc->sc_usize) 471 return (EFAULT); 472 473 if (len > (sc->sc_usize - off)) 474 len = sc->sc_usize - off; 475 476 if (len == 0) 477 return (0); 478 479 /* 480 * Since the snapshot buffer is in the user space we have to copy it 481 * in to the kernel and then back out. The extra copy saves valuable 482 * kernel memory. 483 */ 484 buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK); 485 ubase = sc->sc_uaddr + off; 486 487 while (len) { 488 489 sz = min(PAGE_SIZE, len); 490 if (copyin((void *)ubase, buf, sz)) 491 error = EFAULT; 492 else 493 error = uiomove(buf, sz, uio); 494 495 if (error) 496 break; 497 498 len -= sz; 499 ubase += sz; 500 } 501 free(buf, M_KSYMS); 502 503 return (error); 504 } 505 506 /* ARGSUSED */ 507 static int 508 ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused, 509 struct thread *td __unused) 510 { 511 int error = 0; 512 struct ksyms_softc *sc; 513 514 error = devfs_get_cdevpriv((void **)&sc); 515 if (error) 516 return (error); 517 518 switch (cmd) { 519 case KIOCGSIZE: 520 /* 521 * Return the size (in bytes) of the symbol table 522 * snapshot. 523 */ 524 *(size_t *)data = sc->sc_usize; 525 break; 526 527 case KIOCGADDR: 528 /* 529 * Return the address of the symbol table snapshot. 530 * XXX - compat32 version of this? 531 */ 532 *(void **)data = (void *)sc->sc_uaddr; 533 break; 534 535 default: 536 error = ENOTTY; 537 break; 538 } 539 540 return (error); 541 } 542 543 /* ARGUSED */ 544 static int 545 ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 546 int prot __unused, vm_memattr_t *memattr __unused) 547 { 548 struct ksyms_softc *sc; 549 int error; 550 551 error = devfs_get_cdevpriv((void **)&sc); 552 if (error) 553 return (error); 554 555 /* 556 * XXX mmap() will actually map the symbol table into the process 557 * address space again. 558 */ 559 if (offset > round_page(sc->sc_usize) || 560 (*paddr = pmap_extract(sc->sc_pmap, 561 (vm_offset_t)sc->sc_uaddr + offset)) == 0) 562 return (-1); 563 564 return (0); 565 } 566 567 /* ARGUSED */ 568 static int 569 ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused, 570 struct thread *td) 571 { 572 int error = 0; 573 struct ksyms_softc *sc; 574 575 error = devfs_get_cdevpriv((void **)&sc); 576 if (error) 577 return (error); 578 579 /* Unmap the buffer from the process address space. */ 580 error = copyout_unmap(td, sc->sc_uaddr, sc->sc_usize); 581 582 return (error); 583 } 584 585 /* ARGSUSED */ 586 static int 587 ksyms_modevent(module_t mod __unused, int type, void *data __unused) 588 { 589 int error = 0; 590 591 switch (type) { 592 case MOD_LOAD: 593 mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF); 594 ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL, 595 0444, KSYMS_DNAME); 596 break; 597 598 case MOD_UNLOAD: 599 if (!LIST_EMPTY(&ksyms_list)) 600 return (EBUSY); 601 destroy_dev(ksyms_dev); 602 mtx_destroy(&ksyms_mtx); 603 break; 604 605 case MOD_SHUTDOWN: 606 break; 607 608 default: 609 error = EOPNOTSUPP; 610 break; 611 } 612 return (error); 613 } 614 615 DEV_MODULE(ksyms, ksyms_modevent, NULL); 616 MODULE_VERSION(ksyms, 1); 617