1 /*- 2 * Copyright (c) 1989, 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software developed by the Computer Systems 6 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract 7 * BG 91-66 and contributed to Berkeley. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/fnv_hash.h> 39 40 #define _WANT_VNET 41 42 #include <sys/user.h> 43 #include <sys/linker.h> 44 #include <sys/pcpu.h> 45 #include <sys/stat.h> 46 47 #include <net/vnet.h> 48 49 #include <assert.h> 50 #include <fcntl.h> 51 #include <kvm.h> 52 #include <limits.h> 53 #include <paths.h> 54 #include <stdint.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <stdarg.h> 60 61 #include "kvm_private.h" 62 63 /* 64 * Routines private to libkvm. 65 */ 66 67 /* from src/lib/libc/gen/nlist.c */ 68 int __fdnlist(int, struct nlist *); 69 70 /* 71 * Report an error using printf style arguments. "program" is kd->program 72 * on hard errors, and 0 on soft errors, so that under sun error emulation, 73 * only hard errors are printed out (otherwise, programs like gdb will 74 * generate tons of error messages when trying to access bogus pointers). 75 */ 76 void 77 _kvm_err(kvm_t *kd, const char *program, const char *fmt, ...) 78 { 79 va_list ap; 80 81 va_start(ap, fmt); 82 if (program != NULL) { 83 (void)fprintf(stderr, "%s: ", program); 84 (void)vfprintf(stderr, fmt, ap); 85 (void)fputc('\n', stderr); 86 } else 87 (void)vsnprintf(kd->errbuf, 88 sizeof(kd->errbuf), fmt, ap); 89 90 va_end(ap); 91 } 92 93 void 94 _kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...) 95 { 96 va_list ap; 97 int n; 98 99 va_start(ap, fmt); 100 if (program != NULL) { 101 (void)fprintf(stderr, "%s: ", program); 102 (void)vfprintf(stderr, fmt, ap); 103 (void)fprintf(stderr, ": %s\n", strerror(errno)); 104 } else { 105 char *cp = kd->errbuf; 106 107 (void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap); 108 n = strlen(cp); 109 (void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s", 110 strerror(errno)); 111 } 112 va_end(ap); 113 } 114 115 void * 116 _kvm_malloc(kvm_t *kd, size_t n) 117 { 118 void *p; 119 120 if ((p = calloc(n, sizeof(char))) == NULL) 121 _kvm_err(kd, kd->program, "can't allocate %zu bytes: %s", 122 n, strerror(errno)); 123 return (p); 124 } 125 126 int 127 _kvm_probe_elf_kernel(kvm_t *kd, int class, int machine) 128 { 129 130 return (kd->nlehdr.e_ident[EI_CLASS] == class && 131 kd->nlehdr.e_type == ET_EXEC && 132 kd->nlehdr.e_machine == machine); 133 } 134 135 int 136 _kvm_is_minidump(kvm_t *kd) 137 { 138 char minihdr[8]; 139 140 if (kd->rawdump) 141 return (0); 142 if (pread(kd->pmfd, &minihdr, 8, 0) == 8 && 143 memcmp(&minihdr, "minidump", 8) == 0) 144 return (1); 145 return (0); 146 } 147 148 /* 149 * The powerpc backend has a hack to strip a leading kerneldump 150 * header from the core before treating it as an ELF header. 151 * 152 * We can add that here if we can get a change to libelf to support 153 * an initial offset into the file. Alternatively we could patch 154 * savecore to extract cores from a regular file instead. 155 */ 156 int 157 _kvm_read_core_phdrs(kvm_t *kd, size_t *phnump, GElf_Phdr **phdrp) 158 { 159 GElf_Ehdr ehdr; 160 GElf_Phdr *phdr; 161 Elf *elf; 162 size_t i, phnum; 163 164 elf = elf_begin(kd->pmfd, ELF_C_READ, NULL); 165 if (elf == NULL) { 166 _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); 167 return (-1); 168 } 169 if (elf_kind(elf) != ELF_K_ELF) { 170 _kvm_err(kd, kd->program, "invalid core"); 171 goto bad; 172 } 173 if (gelf_getclass(elf) != kd->nlehdr.e_ident[EI_CLASS]) { 174 _kvm_err(kd, kd->program, "invalid core"); 175 goto bad; 176 } 177 if (gelf_getehdr(elf, &ehdr) == NULL) { 178 _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); 179 goto bad; 180 } 181 if (ehdr.e_type != ET_CORE) { 182 _kvm_err(kd, kd->program, "invalid core"); 183 goto bad; 184 } 185 if (ehdr.e_machine != kd->nlehdr.e_machine) { 186 _kvm_err(kd, kd->program, "invalid core"); 187 goto bad; 188 } 189 190 if (elf_getphdrnum(elf, &phnum) == -1) { 191 _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); 192 goto bad; 193 } 194 195 phdr = calloc(phnum, sizeof(*phdr)); 196 if (phdr == NULL) { 197 _kvm_err(kd, kd->program, "failed to allocate phdrs"); 198 goto bad; 199 } 200 201 for (i = 0; i < phnum; i++) { 202 if (gelf_getphdr(elf, i, &phdr[i]) == NULL) { 203 free(phdr); 204 _kvm_err(kd, kd->program, "%s", elf_errmsg(0)); 205 goto bad; 206 } 207 } 208 elf_end(elf); 209 *phnump = phnum; 210 *phdrp = phdr; 211 return (0); 212 213 bad: 214 elf_end(elf); 215 return (-1); 216 } 217 218 /* 219 * Transform v such that only bits [bit0, bitN) may be set. Generates a 220 * bitmask covering the number of bits, then shifts so +bit0+ is the first. 221 */ 222 static uint64_t 223 bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN) 224 { 225 if (bit0 == 0 && bitN == BITS_IN(v)) 226 return (v); 227 228 return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0)); 229 } 230 231 /* 232 * Returns the number of bits in a given byte array range starting at a 233 * given base, from bit0 to bitN. bit0 may be non-zero in the case of 234 * counting backwards from bitN. 235 */ 236 static uint64_t 237 popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN) 238 { 239 uint32_t res = bitN - bit0; 240 uint64_t count = 0; 241 uint32_t bound; 242 243 /* Align to 64-bit boundary on the left side if needed. */ 244 if ((bit0 % BITS_IN(*addr)) != 0) { 245 bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr))); 246 count += __bitcount64(bitmask_range(*addr, bit0, bound)); 247 res -= (bound - bit0); 248 addr++; 249 } 250 251 while (res > 0) { 252 bound = MIN(res, BITS_IN(*addr)); 253 count += __bitcount64(bitmask_range(*addr, 0, bound)); 254 res -= bound; 255 addr++; 256 } 257 258 return (count); 259 } 260 261 int 262 _kvm_pt_init(kvm_t *kd, size_t map_len, off_t map_off, off_t sparse_off, 263 int page_size, int word_size) 264 { 265 uint64_t *addr; 266 uint32_t *popcount_bin; 267 int bin_popcounts = 0; 268 uint64_t pc_bins, res; 269 ssize_t rd; 270 271 /* 272 * Map the bitmap specified by the arguments. 273 */ 274 kd->pt_map = _kvm_malloc(kd, map_len); 275 if (kd->pt_map == NULL) { 276 _kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap", 277 map_len); 278 return (-1); 279 } 280 rd = pread(kd->pmfd, kd->pt_map, map_len, map_off); 281 if (rd < 0 || rd != (ssize_t)map_len) { 282 _kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap", 283 map_len); 284 return (-1); 285 } 286 kd->pt_map_size = map_len; 287 288 /* 289 * Generate a popcount cache for every POPCOUNT_BITS in the bitmap, 290 * so lookups only have to calculate the number of bits set between 291 * a cache point and their bit. This reduces lookups to O(1), 292 * without significantly increasing memory requirements. 293 * 294 * Round up the number of bins so that 'upper half' lookups work for 295 * the final bin, if needed. The first popcount is 0, since no bits 296 * precede bit 0, so add 1 for that also. Without this, extra work 297 * would be needed to handle the first PTEs in _kvm_pt_find(). 298 */ 299 addr = kd->pt_map; 300 res = map_len; 301 pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS; 302 kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t)); 303 if (kd->pt_popcounts == NULL) 304 return (-1); 305 306 for (popcount_bin = &kd->pt_popcounts[1]; res > 0; 307 addr++, res -= sizeof(*addr)) { 308 *popcount_bin += popcount_bytes(addr, 0, 309 MIN(res * NBBY, BITS_IN(*addr))); 310 if (++bin_popcounts == POPCOUNTS_IN(*addr)) { 311 popcount_bin++; 312 *popcount_bin = *(popcount_bin - 1); 313 bin_popcounts = 0; 314 } 315 } 316 317 assert(pc_bins * sizeof(*popcount_bin) == 318 ((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts)); 319 320 kd->pt_sparse_off = sparse_off; 321 kd->pt_sparse_size = (uint64_t)*popcount_bin * PAGE_SIZE; 322 kd->pt_page_size = page_size; 323 kd->pt_word_size = word_size; 324 return (0); 325 } 326 327 /* 328 * Find the offset for the given physical page address; returns -1 otherwise. 329 * 330 * A page's offset is represented by the sparse page base offset plus the 331 * number of bits set before its bit multiplied by PAGE_SIZE. This means 332 * that if a page exists in the dump, it's necessary to know how many pages 333 * in the dump precede it. Reduce this O(n) counting to O(1) by caching the 334 * number of bits set at POPCOUNT_BITS intervals. 335 * 336 * Then to find the number of pages before the requested address, simply 337 * index into the cache and count the number of bits set between that cache 338 * bin and the page's bit. Halve the number of bytes that have to be 339 * checked by also counting down from the next higher bin if it's closer. 340 */ 341 off_t 342 _kvm_pt_find(kvm_t *kd, uint64_t pa) 343 { 344 uint64_t *bitmap = kd->pt_map; 345 uint64_t pte_bit_id = pa / PAGE_SIZE; 346 uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap); 347 uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS; 348 uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap)); 349 uint64_t bitN; 350 uint32_t count; 351 352 /* Check whether the page address requested is in the dump. */ 353 if (pte_bit_id >= (kd->pt_map_size * NBBY) || 354 (bitmap[pte_u64] & pte_mask) == 0) 355 return (-1); 356 357 /* 358 * Add/sub popcounts from the bitmap until the PTE's bit is reached. 359 * For bits that are in the upper half between the calculated 360 * popcount id and the next one, use the next one and subtract to 361 * minimize the number of popcounts required. 362 */ 363 if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) { 364 count = kd->pt_popcounts[popcount_id] + popcount_bytes( 365 bitmap + popcount_id * POPCOUNTS_IN(*bitmap), 366 0, pte_bit_id - popcount_id * POPCOUNT_BITS); 367 } else { 368 /* 369 * Counting in reverse is trickier, since we must avoid 370 * reading from bytes that are not in range, and invert. 371 */ 372 uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap); 373 374 popcount_id++; 375 bitN = MIN(popcount_id * POPCOUNT_BITS, 376 kd->pt_map_size * BITS_IN(uint8_t)); 377 count = kd->pt_popcounts[popcount_id] - popcount_bytes( 378 bitmap + pte_u64, 379 pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off); 380 } 381 382 /* 383 * This can only happen if the core is truncated. Treat these 384 * entries as if they don't exist, since their backing doesn't. 385 */ 386 if (count >= (kd->pt_sparse_size / PAGE_SIZE)) 387 return (-1); 388 389 return (kd->pt_sparse_off + (uint64_t)count * PAGE_SIZE); 390 } 391 392 static int 393 kvm_fdnlist(kvm_t *kd, struct kvm_nlist *list) 394 { 395 kvaddr_t addr; 396 int error, nfail; 397 398 if (kd->resolve_symbol == NULL) { 399 struct nlist *nl; 400 int count, i; 401 402 for (count = 0; list[count].n_name != NULL && 403 list[count].n_name[0] != '\0'; count++) 404 ; 405 nl = calloc(count + 1, sizeof(*nl)); 406 for (i = 0; i < count; i++) 407 nl[i].n_name = list[i].n_name; 408 nfail = __fdnlist(kd->nlfd, nl); 409 for (i = 0; i < count; i++) { 410 list[i].n_type = nl[i].n_type; 411 list[i].n_value = nl[i].n_value; 412 } 413 free(nl); 414 return (nfail); 415 } 416 417 nfail = 0; 418 while (list->n_name != NULL && list->n_name[0] != '\0') { 419 error = kd->resolve_symbol(list->n_name, &addr); 420 if (error != 0) { 421 nfail++; 422 list->n_value = 0; 423 list->n_type = 0; 424 } else { 425 list->n_value = addr; 426 list->n_type = N_DATA | N_EXT; 427 } 428 list++; 429 } 430 return (nfail); 431 } 432 433 /* 434 * Walk the list of unresolved symbols, generate a new list and prefix the 435 * symbol names, try again, and merge back what we could resolve. 436 */ 437 static int 438 kvm_fdnlist_prefix(kvm_t *kd, struct kvm_nlist *nl, int missing, 439 const char *prefix, kvaddr_t (*validate_fn)(kvm_t *, kvaddr_t)) 440 { 441 struct kvm_nlist *n, *np, *p; 442 char *cp, *ce; 443 const char *ccp; 444 size_t len; 445 int slen, unresolved; 446 447 /* 448 * Calculate the space we need to malloc for nlist and names. 449 * We are going to store the name twice for later lookups: once 450 * with the prefix and once the unmodified name delmited by \0. 451 */ 452 len = 0; 453 unresolved = 0; 454 for (p = nl; p->n_name && p->n_name[0]; ++p) { 455 if (p->n_type != N_UNDF) 456 continue; 457 len += sizeof(struct kvm_nlist) + strlen(prefix) + 458 2 * (strlen(p->n_name) + 1); 459 unresolved++; 460 } 461 if (unresolved == 0) 462 return (unresolved); 463 /* Add space for the terminating nlist entry. */ 464 len += sizeof(struct kvm_nlist); 465 unresolved++; 466 467 /* Alloc one chunk for (nlist, [names]) and setup pointers. */ 468 n = np = malloc(len); 469 bzero(n, len); 470 if (n == NULL) 471 return (missing); 472 cp = ce = (char *)np; 473 cp += unresolved * sizeof(struct kvm_nlist); 474 ce += len; 475 476 /* Generate shortened nlist with special prefix. */ 477 unresolved = 0; 478 for (p = nl; p->n_name && p->n_name[0]; ++p) { 479 if (p->n_type != N_UNDF) 480 continue; 481 *np = *p; 482 /* Save the new\0orig. name so we can later match it again. */ 483 slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix, 484 (prefix[0] != '\0' && p->n_name[0] == '_') ? 485 (p->n_name + 1) : p->n_name, '\0', p->n_name); 486 if (slen < 0 || slen >= ce - cp) 487 continue; 488 np->n_name = cp; 489 cp += slen + 1; 490 np++; 491 unresolved++; 492 } 493 494 /* Do lookup on the reduced list. */ 495 np = n; 496 unresolved = kvm_fdnlist(kd, np); 497 498 /* Check if we could resolve further symbols and update the list. */ 499 if (unresolved >= 0 && unresolved < missing) { 500 /* Find the first freshly resolved entry. */ 501 for (; np->n_name && np->n_name[0]; np++) 502 if (np->n_type != N_UNDF) 503 break; 504 /* 505 * The lists are both in the same order, 506 * so we can walk them in parallel. 507 */ 508 for (p = nl; np->n_name && np->n_name[0] && 509 p->n_name && p->n_name[0]; ++p) { 510 if (p->n_type != N_UNDF) 511 continue; 512 /* Skip expanded name and compare to orig. one. */ 513 ccp = np->n_name + strlen(np->n_name) + 1; 514 if (strcmp(ccp, p->n_name) != 0) 515 continue; 516 /* Update nlist with new, translated results. */ 517 p->n_type = np->n_type; 518 if (validate_fn) 519 p->n_value = (*validate_fn)(kd, np->n_value); 520 else 521 p->n_value = np->n_value; 522 missing--; 523 /* Find next freshly resolved entry. */ 524 for (np++; np->n_name && np->n_name[0]; np++) 525 if (np->n_type != N_UNDF) 526 break; 527 } 528 } 529 /* We could assert missing = unresolved here. */ 530 531 free(n); 532 return (unresolved); 533 } 534 535 int 536 _kvm_nlist(kvm_t *kd, struct kvm_nlist *nl, int initialize) 537 { 538 struct kvm_nlist *p; 539 int nvalid; 540 struct kld_sym_lookup lookup; 541 int error; 542 const char *prefix = ""; 543 char symname[1024]; /* XXX-BZ symbol name length limit? */ 544 int tried_vnet, tried_dpcpu; 545 546 /* 547 * If we can't use the kld symbol lookup, revert to the 548 * slow library call. 549 */ 550 if (!ISALIVE(kd)) { 551 error = kvm_fdnlist(kd, nl); 552 if (error <= 0) /* Hard error or success. */ 553 return (error); 554 555 if (_kvm_vnet_initialized(kd, initialize)) 556 error = kvm_fdnlist_prefix(kd, nl, error, 557 VNET_SYMPREFIX, _kvm_vnet_validaddr); 558 559 if (error > 0 && _kvm_dpcpu_initialized(kd, initialize)) 560 error = kvm_fdnlist_prefix(kd, nl, error, 561 DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr); 562 563 return (error); 564 } 565 566 /* 567 * We can use the kld lookup syscall. Go through each nlist entry 568 * and look it up with a kldsym(2) syscall. 569 */ 570 nvalid = 0; 571 tried_vnet = 0; 572 tried_dpcpu = 0; 573 again: 574 for (p = nl; p->n_name && p->n_name[0]; ++p) { 575 if (p->n_type != N_UNDF) 576 continue; 577 578 lookup.version = sizeof(lookup); 579 lookup.symvalue = 0; 580 lookup.symsize = 0; 581 582 error = snprintf(symname, sizeof(symname), "%s%s", prefix, 583 (prefix[0] != '\0' && p->n_name[0] == '_') ? 584 (p->n_name + 1) : p->n_name); 585 if (error < 0 || error >= (int)sizeof(symname)) 586 continue; 587 lookup.symname = symname; 588 if (lookup.symname[0] == '_') 589 lookup.symname++; 590 591 if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) { 592 p->n_type = N_TEXT; 593 if (_kvm_vnet_initialized(kd, initialize) && 594 strcmp(prefix, VNET_SYMPREFIX) == 0) 595 p->n_value = 596 _kvm_vnet_validaddr(kd, lookup.symvalue); 597 else if (_kvm_dpcpu_initialized(kd, initialize) && 598 strcmp(prefix, DPCPU_SYMPREFIX) == 0) 599 p->n_value = 600 _kvm_dpcpu_validaddr(kd, lookup.symvalue); 601 else 602 p->n_value = lookup.symvalue; 603 ++nvalid; 604 /* lookup.symsize */ 605 } 606 } 607 608 /* 609 * Check the number of entries that weren't found. If they exist, 610 * try again with a prefix for virtualized or DPCPU symbol names. 611 */ 612 error = ((p - nl) - nvalid); 613 if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) { 614 tried_vnet = 1; 615 prefix = VNET_SYMPREFIX; 616 goto again; 617 } 618 if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) { 619 tried_dpcpu = 1; 620 prefix = DPCPU_SYMPREFIX; 621 goto again; 622 } 623 624 /* 625 * Return the number of entries that weren't found. If they exist, 626 * also fill internal error buffer. 627 */ 628 error = ((p - nl) - nvalid); 629 if (error) 630 _kvm_syserr(kd, kd->program, "kvm_nlist"); 631 return (error); 632 } 633