1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * KVM backend for hypervisor domain dumps. We don't use libkvm for 28 * such dumps, since they do not have a namelist file or the typical 29 * dump structures we expect to aid bootstrapping. Instead, we 30 * bootstrap based upon a debug_info structure at a known VA, using the 31 * guest's own page tables to resolve to physical addresses, and 32 * construct the namelist in a manner similar to ksyms_snapshot(). 33 * 34 * Note that there are two formats understood by this module: the older, 35 * ad hoc format, which we call 'core' within this file, and an 36 * ELF-based format, known as 'elf'. 37 * 38 * We only support the older format generated on Solaris dom0: before we 39 * fixed it, core dump files were broken whenever a PFN didn't map a 40 * real MFN (!). 41 */ 42 43 #include <strings.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <stddef.h> 47 #include <stdarg.h> 48 #include <unistd.h> 49 #include <fcntl.h> 50 #include <gelf.h> 51 #include <errno.h> 52 53 #include <sys/mman.h> 54 #include <sys/stat.h> 55 #include <sys/debug_info.h> 56 #include <sys/xen_mmu.h> 57 #include <sys/elf.h> 58 #include <sys/machelf.h> 59 #include <sys/modctl.h> 60 #include <sys/kobj.h> 61 #include <sys/kobj_impl.h> 62 #include <sys/sysmacros.h> 63 #include <sys/privmregs.h> 64 #include <vm/as.h> 65 66 #include <mdb/mdb_io.h> 67 #include <mdb/mdb_kb.h> 68 #include <mdb/mdb_target_impl.h> 69 70 #include <xen/public/xen.h> 71 #include <xen/public/version.h> 72 #include <xen/public/elfnote.h> 73 74 #define XKB_SHDR_NULL 0 75 #define XKB_SHDR_SYMTAB 1 76 #define XKB_SHDR_STRTAB 2 77 #define XKB_SHDR_SHSTRTAB 3 78 #define XKB_SHDR_NUM 4 79 80 #define XKB_WALK_LOCAL 0x1 81 #define XKB_WALK_GLOBAL 0x2 82 #define XKB_WALK_STR 0x4 83 #define XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR) 84 85 #if defined(__i386) 86 #define DEBUG_INFO 0xf4bff000 87 #define DEBUG_INFO_HVM 0xfe7ff000 88 #elif defined(__amd64) 89 #define DEBUG_INFO 0xfffffffffb7ff000 90 #define DEBUG_INFO_HVM 0xfffffffffb7ff000 91 #endif 92 93 #define PAGE_SIZE 0x1000 94 #define PAGE_SHIFT 12 95 #define PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1)) 96 #define PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1)) 97 #define PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0) 98 #define PT_PADDR_LGPG 0x000fffffffffe000ull 99 #define PT_PADDR 0x000ffffffffff000ull 100 #define PT_VALID 0x1 101 #define PT_PAGESIZE 0x080 102 #define PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE)) 103 104 #define XC_CORE_MAGIC 0xF00FEBED 105 #define XC_CORE_MAGIC_HVM 0xF00FEBEE 106 107 #define VGCF_HVM_GUEST (1<<1) 108 109 typedef struct xc_core_header { 110 unsigned int xch_magic; 111 unsigned int xch_nr_vcpus; 112 unsigned int xch_nr_pages; 113 unsigned int xch_ctxt_offset; 114 unsigned int xch_index_offset; 115 unsigned int xch_pages_offset; 116 } xc_core_header_t; 117 118 struct xc_elf_header { 119 uint64_t xeh_magic; 120 uint64_t xeh_nr_vcpus; 121 uint64_t xeh_nr_pages; 122 uint64_t xeh_page_size; 123 }; 124 125 struct xc_elf_version { 126 uint64_t xev_major; 127 uint64_t xev_minor; 128 xen_extraversion_t xev_extra; 129 xen_compile_info_t xev_compile_info; 130 xen_capabilities_info_t xev_capabilities; 131 xen_changeset_info_t xev_changeset; 132 xen_platform_parameters_t xev_platform_parameters; 133 uint64_t xev_pagesize; 134 }; 135 136 /* 137 * Either an old-style (3.0.4) core format, or the ELF format. 138 */ 139 typedef enum { 140 XKB_FORMAT_UNKNOWN = 0, 141 XKB_FORMAT_CORE = 1, 142 XKB_FORMAT_ELF = 2 143 } xkb_type_t; 144 145 typedef struct mfn_map { 146 mfn_t mm_mfn; 147 char *mm_map; 148 } mfn_map_t; 149 150 typedef struct mmu_info { 151 size_t mi_max; 152 size_t mi_shift[4]; 153 size_t mi_ptes; 154 size_t mi_ptesize; 155 } mmu_info_t; 156 157 typedef struct xkb_core { 158 xc_core_header_t xc_hdr; 159 void *xc_p2m_buf; 160 } xkb_core_t; 161 162 typedef struct xkb_elf { 163 mdb_gelf_file_t *xe_gelf; 164 size_t *xe_off; 165 struct xc_elf_header xe_hdr; 166 struct xc_elf_version xe_version; 167 } xkb_elf_t; 168 169 typedef struct xkb { 170 char *xkb_path; 171 int xkb_fd; 172 int xkb_is_hvm; 173 174 xkb_type_t xkb_type; 175 xkb_core_t xkb_core; 176 xkb_elf_t xkb_elf; 177 178 size_t xkb_nr_vcpus; 179 size_t xkb_nr_pages; 180 size_t xkb_pages_off; 181 xen_pfn_t xkb_max_pfn; 182 mfn_t xkb_max_mfn; 183 int xkb_is_pae; 184 185 mmu_info_t xkb_mmu; 186 debug_info_t xkb_info; 187 188 void *xkb_vcpu_data; 189 size_t xkb_vcpu_data_sz; 190 struct vcpu_guest_context **xkb_vcpus; 191 192 char *xkb_pages; 193 mfn_t *xkb_p2m; 194 xen_pfn_t *xkb_m2p; 195 mfn_map_t xkb_pt_map[4]; 196 mfn_map_t xkb_map; 197 198 char *xkb_namelist; 199 size_t xkb_namesize; 200 } xkb_t; 201 202 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0"; 203 204 typedef struct xkb_namelist { 205 Ehdr kh_elf_hdr; 206 Phdr kh_text_phdr; 207 Phdr kh_data_phdr; 208 Shdr kh_shdr[XKB_SHDR_NUM]; 209 char shstrings[sizeof (xkb_shstrtab)]; 210 } xkb_namelist_t; 211 212 static int xkb_build_ksyms(xkb_t *); 213 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t); 214 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t); 215 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t); 216 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *); 217 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *); 218 static int xkb_close(xkb_t *); 219 220 /* 221 * Jump through the hoops we need to to correctly identify a core file 222 * of either the old or new format. 223 */ 224 int 225 xkb_identify(const char *file, int *longmode) 226 { 227 xc_core_header_t header; 228 mdb_gelf_file_t *gf = NULL; 229 mdb_gelf_sect_t *sect = NULL; 230 mdb_io_t *io = NULL; 231 char *notes = NULL; 232 char *pos; 233 int ret = 0; 234 size_t sz; 235 int fd; 236 237 if ((fd = open64(file, O_RDONLY)) == -1) 238 return (-1); 239 240 if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) { 241 (void) close(fd); 242 return (0); 243 } 244 245 (void) close(fd); 246 247 if (header.xch_magic == XC_CORE_MAGIC) { 248 *longmode = 0; 249 250 /* 251 * Indeed. 252 */ 253 sz = header.xch_index_offset - header.xch_ctxt_offset; 254 #ifdef _LP64 255 if (sizeof (struct vcpu_guest_context) * 256 header.xch_nr_vcpus == sz) 257 *longmode = 1; 258 #else 259 if (sizeof (struct vcpu_guest_context) * 260 header.xch_nr_vcpus != sz) 261 *longmode = 1; 262 #endif /* _LP64 */ 263 264 return (1); 265 } 266 267 if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL) 268 return (-1); 269 270 if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL) 271 goto out; 272 273 if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL) 274 goto out; 275 276 if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL) 277 goto out; 278 279 for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) { 280 struct xc_elf_version *vers; 281 /* LINTED - alignment */ 282 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos; 283 char *desc; 284 char *name; 285 286 name = pos + sizeof (*nhdr); 287 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4); 288 289 pos = desc + nhdr->n_descsz; 290 291 if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION) 292 continue; 293 294 /* 295 * The contents of this struct differ between 32 and 64 296 * bit; however, not until past the 'xev_capabilities' 297 * member, so we can just about get away with this. 298 */ 299 300 /* LINTED - alignment */ 301 vers = (struct xc_elf_version *)desc; 302 303 if (strstr(vers->xev_capabilities, "x86_64")) { 304 /* 305 * 64-bit hypervisor, but it can still be 306 * a 32-bit domain core. 32-bit domain cores 307 * are also dumped in Elf64 format, but they 308 * have e_machine set to EM_386, not EM_AMD64. 309 */ 310 if (gf->gf_ehdr.e_machine == EM_386) 311 *longmode = 0; 312 else 313 *longmode = 1; 314 } else if (strstr(vers->xev_capabilities, "x86_32") || 315 strstr(vers->xev_capabilities, "x86_32p")) { 316 /* 317 * 32-bit hypervisor, can only be a 32-bit core. 318 */ 319 *longmode = 0; 320 } else { 321 mdb_warn("couldn't derive word size of dump; " 322 "assuming 64-bit"); 323 *longmode = 1; 324 } 325 } 326 327 ret = 1; 328 329 out: 330 if (gf != NULL) 331 mdb_gelf_destroy(gf); 332 else if (io != NULL) 333 mdb_io_destroy(io); 334 return (ret); 335 } 336 337 static void * 338 xkb_fail(xkb_t *xkb, const char *msg, ...) 339 { 340 va_list args; 341 342 va_start(args, msg); 343 if (xkb != NULL) 344 (void) fprintf(stderr, "%s: ", xkb->xkb_path); 345 (void) vfprintf(stderr, msg, args); 346 (void) fprintf(stderr, "\n"); 347 va_end(args); 348 if (xkb != NULL) 349 (void) xkb_close(xkb); 350 351 errno = ENOEXEC; 352 353 return (NULL); 354 } 355 356 static int 357 xkb_build_m2p(xkb_t *xkb) 358 { 359 size_t i; 360 361 for (i = 0; i <= xkb->xkb_max_pfn; i++) { 362 if (xkb->xkb_p2m[i] != MFN_INVALID && 363 xkb->xkb_p2m[i] > xkb->xkb_max_mfn) 364 xkb->xkb_max_mfn = xkb->xkb_p2m[i]; 365 } 366 367 xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t), 368 UM_SLEEP); 369 370 for (i = 0; i <= xkb->xkb_max_mfn; i++) 371 xkb->xkb_m2p[i] = PFN_INVALID; 372 373 for (i = 0; i <= xkb->xkb_max_pfn; i++) { 374 if (xkb->xkb_p2m[i] != MFN_INVALID) 375 xkb->xkb_m2p[xkb->xkb_p2m[i]] = i; 376 } 377 378 return (1); 379 } 380 381 /* 382 * With FORMAT_CORE, we can use the table in the dump file directly. 383 * Just to make things fun, they've not page-aligned the p2m table. 384 */ 385 static int 386 xkb_map_p2m(xkb_t *xkb) 387 { 388 offset_t off; 389 size_t size; 390 xkb_core_t *xc = &xkb->xkb_core; 391 size_t count = xkb->xkb_nr_pages; 392 size_t boff = xc->xc_hdr.xch_index_offset; 393 394 size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2); 395 size = PAGE_MASK(size); 396 off = PAGE_MASK(boff); 397 398 /* LINTED - alignment */ 399 xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ, 400 MAP_SHARED, xkb->xkb_fd, off); 401 402 if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) { 403 (void) xkb_fail(xkb, "cannot map p2m table"); 404 return (0); 405 } 406 407 /* LINTED - alignment */ 408 xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf + 409 PAGE_OFFSET(boff)); 410 411 return (1); 412 } 413 414 /* 415 * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert 416 * into a linear array indexed by pfn for convenience. We also need to 417 * track the mapping between mfn and the offset in the file: a pfn with 418 * no mfn will not appear in the core file. 419 */ 420 static int 421 xkb_build_p2m(xkb_t *xkb) 422 { 423 xkb_elf_t *xe = &xkb->xkb_elf; 424 mdb_gelf_sect_t *sect; 425 size_t size; 426 size_t i; 427 428 struct elf_p2m { 429 uint64_t pfn; 430 uint64_t gmfn; 431 } *p2m; 432 433 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m"); 434 435 if (sect == NULL) { 436 (void) xkb_fail(xkb, "cannot find section .xen_p2m"); 437 return (0); 438 } 439 440 if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) { 441 (void) xkb_fail(xkb, "couldn't read .xen_p2m"); 442 return (0); 443 } 444 445 for (i = 0; i < xkb->xkb_nr_pages; i++) { 446 if (p2m[i].pfn > xkb->xkb_max_pfn) 447 xkb->xkb_max_pfn = p2m[i].pfn; 448 } 449 450 size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1); 451 xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP); 452 size = sizeof (size_t) * (xkb->xkb_max_pfn + 1); 453 xe->xe_off = mdb_alloc(size, UM_SLEEP); 454 455 for (i = 0; i <= xkb->xkb_max_pfn; i++) { 456 xkb->xkb_p2m[i] = PFN_INVALID; 457 xe->xe_off[i] = (size_t)-1; 458 } 459 460 for (i = 0; i < xkb->xkb_nr_pages; i++) { 461 xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn; 462 xe->xe_off[p2m[i].pfn] = i; 463 } 464 465 return (1); 466 } 467 468 /* 469 * For HVM images, we don't have the corresponding MFN list; the table 470 * is just a mapping from page index in the dump to the corresponding 471 * PFN. To simplify the other code, we'll pretend that these PFNs are 472 * really MFNs as well, by populating xkb_p2m. 473 */ 474 static int 475 xkb_build_fake_p2m(xkb_t *xkb) 476 { 477 xkb_elf_t *xe = &xkb->xkb_elf; 478 mdb_gelf_sect_t *sect; 479 size_t size; 480 size_t i; 481 482 uint64_t *p2pfn; 483 484 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn"); 485 486 if (sect == NULL) { 487 (void) xkb_fail(xkb, "cannot find section .xen_pfn"); 488 return (0); 489 } 490 491 if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) { 492 (void) xkb_fail(xkb, "couldn't read .xen_pfn"); 493 return (0); 494 } 495 496 for (i = 0; i < xkb->xkb_nr_pages; i++) { 497 if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn) 498 xkb->xkb_max_pfn = p2pfn[i]; 499 } 500 501 size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1); 502 xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP); 503 504 size = sizeof (size_t) * (xkb->xkb_max_pfn + 1); 505 xe->xe_off = mdb_alloc(size, UM_SLEEP); 506 507 for (i = 0; i <= xkb->xkb_max_pfn; i++) { 508 xkb->xkb_p2m[i] = PFN_INVALID; 509 xe->xe_off[i] = (size_t)-1; 510 } 511 512 for (i = 0; i < xkb->xkb_nr_pages; i++) { 513 if (p2pfn[i] == PFN_INVALID) 514 continue; 515 xkb->xkb_p2m[p2pfn[i]] = p2pfn[i]; 516 xe->xe_off[p2pfn[i]] = i; 517 } 518 519 return (1); 520 } 521 522 /* 523 * Return the MFN of the top-level page table for the given as. 524 */ 525 static mfn_t 526 xkb_as_to_mfn(xkb_t *xkb, struct as *as) 527 { 528 uintptr_t asp = (uintptr_t)as; 529 uintptr_t hatp; 530 uintptr_t htablep; 531 uintptr_t pfn; 532 533 if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp)) 534 return (MFN_INVALID); 535 if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off, 536 &htablep)) 537 return (MFN_INVALID); 538 if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off, 539 &pfn)) 540 return (MFN_INVALID); 541 542 if (pfn > xkb->xkb_max_pfn) 543 return (MFN_INVALID); 544 545 return (xkb->xkb_p2m[pfn]); 546 } 547 548 static mfn_t 549 xkb_cr3_to_pfn(xkb_t *xkb) 550 { 551 uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3]; 552 if (xkb->xkb_is_hvm) 553 return (cr3 >> PAGE_SHIFT); 554 return (xen_cr3_to_pfn(cr3)); 555 } 556 557 static ssize_t 558 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr, 559 void *buf, size_t size) 560 { 561 size_t left = size; 562 int windowed = (xkb->xkb_pages == NULL); 563 mfn_t tlmfn = xkb_cr3_to_pfn(xkb); 564 565 if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID) 566 return (-1); 567 568 while (left) { 569 uint64_t pos = addr + (size - left); 570 char *outpos = (char *)buf + (size - left); 571 size_t pageoff = PAGE_OFFSET(pos); 572 size_t sz = MIN(left, PAGE_SIZE - pageoff); 573 mfn_t mfn; 574 575 if (!phys) { 576 mfn = xkb_va_to_mfn(xkb, pos, tlmfn); 577 if (mfn == MFN_INVALID) 578 return (-1); 579 } else { 580 xen_pfn_t pfn = pos >> PAGE_SHIFT; 581 if (pfn > xkb->xkb_max_pfn) 582 return (-1); 583 mfn = xkb->xkb_p2m[pfn]; 584 if (mfn == MFN_INVALID) 585 return (-1); 586 } 587 588 /* 589 * If we're windowed then pread() is much faster. 590 */ 591 if (windowed) { 592 offset_t off = xkb_mfn_to_offset(xkb, mfn); 593 int ret; 594 595 if (off == ~1ULL) 596 return (-1); 597 598 off += pageoff; 599 600 ret = pread64(xkb->xkb_fd, outpos, sz, off); 601 if (ret == -1) 602 return (-1); 603 if (ret != sz) 604 return ((size - left) + ret); 605 606 left -= ret; 607 } else { 608 if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL) 609 return (-1); 610 611 bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz); 612 613 left -= sz; 614 } 615 } 616 617 return (size); 618 } 619 620 static ssize_t 621 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size) 622 { 623 return (xkb_read_helper(xkb, NULL, 1, addr, buf, size)); 624 } 625 626 static ssize_t 627 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as) 628 { 629 return (xkb_read_helper(xkb, as, 0, addr, buf, size)); 630 } 631 632 static ssize_t 633 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size) 634 { 635 return (xkb_aread(xkb, addr, buf, size, NULL)); 636 } 637 638 static int 639 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf) 640 { 641 if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) != 642 sizeof (uintptr_t)) 643 return (0); 644 return (1); 645 } 646 647 static char * 648 xkb_readstr(xkb_t *xkb, uintptr_t addr) 649 { 650 char *str = mdb_alloc(1024, UM_SLEEP); 651 size_t i; 652 653 for (i = 0; i < 1024; i++) { 654 if (xkb_read(xkb, addr + i, &str[i], 1) != 1) { 655 mdb_free(str, 1024); 656 return (NULL); 657 } 658 659 if (str[i] == '\0') 660 break; 661 } 662 663 if (i == 1024) { 664 mdb_free(str, 1024); 665 return (NULL); 666 } 667 668 return (str); 669 } 670 671 static offset_t 672 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn) 673 { 674 if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn) 675 return (-1ULL); 676 677 if (xkb->xkb_type == XKB_FORMAT_CORE) 678 return (PAGE_SIZE * pfn); 679 680 return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn])); 681 } 682 683 static offset_t 684 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn) 685 { 686 xen_pfn_t pfn; 687 688 if (mfn > xkb->xkb_max_mfn) 689 return (-1ULL); 690 691 pfn = xkb->xkb_m2p[mfn]; 692 693 if (pfn == PFN_INVALID) 694 return (-1ULL); 695 696 return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn)); 697 } 698 699 static char * 700 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm) 701 { 702 int windowed = (xkb->xkb_pages == NULL); 703 offset_t off; 704 705 if (mm->mm_mfn == mfn) 706 return (mm->mm_map); 707 708 mm->mm_mfn = mfn; 709 710 if (windowed) { 711 if (mm->mm_map != (char *)MAP_FAILED) { 712 (void) munmap(mm->mm_map, PAGE_SIZE); 713 mm->mm_map = (void *)MAP_FAILED; 714 } 715 716 if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL)) 717 return (NULL); 718 719 mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, 720 xkb->xkb_fd, off); 721 722 if (mm->mm_map == (char *)MAP_FAILED) 723 return (NULL); 724 } else { 725 xen_pfn_t pfn; 726 727 mm->mm_map = NULL; 728 729 if (mfn > xkb->xkb_max_mfn) 730 return (NULL); 731 732 pfn = xkb->xkb_m2p[mfn]; 733 734 if (pfn == PFN_INVALID) 735 return (NULL); 736 737 mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn); 738 } 739 740 return (mm->mm_map); 741 } 742 743 static uint64_t 744 xkb_get_pte(mmu_info_t *mmu, char *ptep) 745 { 746 uint64_t pte = 0; 747 748 if (mmu->mi_ptesize == 8) { 749 /* LINTED - alignment */ 750 pte = *((uint64_t *)ptep); 751 } else { 752 /* LINTED - alignment */ 753 pte = *((uint32_t *)ptep); 754 } 755 756 return (pte); 757 } 758 759 static mfn_t 760 xkb_pte_to_base_mfn(uint64_t pte, size_t level) 761 { 762 if (PTE_IS_LGPG(pte, level)) { 763 pte &= PT_PADDR_LGPG; 764 } else { 765 pte &= PT_PADDR; 766 } 767 768 return (pte >> PAGE_SHIFT); 769 } 770 771 /* 772 * Resolve the given VA into an MFN, using the provided mfn as a top-level page 773 * table. 774 */ 775 static mfn_t 776 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn) 777 { 778 mmu_info_t *mmu = &xkb->xkb_mmu; 779 uint64_t pte; 780 size_t level; 781 782 for (level = mmu->mi_max; ; --level) { 783 size_t entry; 784 785 if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL) 786 return (MFN_INVALID); 787 788 entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1); 789 790 pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map + 791 entry * mmu->mi_ptesize); 792 793 if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID) 794 return (MFN_INVALID); 795 796 if (level == 0) 797 break; 798 799 /* 800 * Currently 'mfn' refers to the base MFN of the 801 * large-page mapping. Add on the 4K-sized index into 802 * the large-page mapping to get the right MFN within 803 * the mapping. 804 */ 805 if (PTE_IS_LGPG(pte, level)) { 806 mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >> 807 PAGE_SHIFT; 808 break; 809 } 810 } 811 812 return (mfn); 813 } 814 815 static int 816 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module, 817 uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr) 818 { 819 if (xkb_read(xkb, modulep, module, sizeof (struct module)) != 820 sizeof (struct module)) 821 return (0); 822 823 if (!xkb_read_word(xkb, (uintptr_t)module->symhdr + 824 offsetof(Shdr, sh_addr), sym_addr)) 825 return (0); 826 827 if (!xkb_read_word(xkb, (uintptr_t)module->strhdr + 828 offsetof(Shdr, sh_addr), str_addr)) 829 return (0); 830 831 if (!xkb_read_word(xkb, (uintptr_t)module->symhdr + 832 offsetof(Shdr, sh_size), sym_count)) 833 return (0); 834 *sym_count /= sizeof (Sym); 835 836 return (1); 837 } 838 839 static int 840 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types, 841 uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count) 842 { 843 size_t i; 844 845 for (i = 0; i < sym_count; i++) { 846 Sym sym; 847 char *name; 848 size_t sz; 849 int type = XKB_WALK_GLOBAL; 850 851 if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym, 852 sizeof (sym)) != sizeof (sym)) 853 return (0); 854 855 if (GELF_ST_BIND(sym.st_info) == STB_LOCAL) 856 type = XKB_WALK_LOCAL; 857 858 name = xkb_readstr(xkb, str_addr + sym.st_name); 859 860 sym.st_shndx = SHN_ABS; 861 sym.st_name = sizes[XKB_WALK_STR]; 862 863 sizes[type] += sizeof (sym); 864 sz = strlen(name) + 1; 865 sizes[XKB_WALK_STR] += sz; 866 867 if (buf != NULL) { 868 if (types & type) { 869 bcopy(&sym, *buf, sizeof (sym)); 870 *buf += sizeof (sym); 871 } 872 if (types & XKB_WALK_STR) { 873 bcopy(name, *buf, sz); 874 *buf += sz; 875 } 876 } 877 878 mdb_free(name, 1024); 879 } 880 881 return (1); 882 } 883 884 static int 885 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf, 886 size_t *sizes, int types) 887 { 888 uintptr_t modctl = modhead; 889 uintptr_t modulep; 890 struct module module; 891 uintptr_t sym_count; 892 uintptr_t sym_addr; 893 uintptr_t str_addr; 894 size_t max_iter = 500; 895 896 bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1)); 897 898 /* 899 * empty first symbol 900 */ 901 sizes[XKB_WALK_LOCAL] += sizeof (Sym); 902 sizes[XKB_WALK_STR] += 1; 903 904 if (buf != NULL) { 905 if (types & XKB_WALK_LOCAL) { 906 Sym tmp; 907 bzero(&tmp, sizeof (tmp)); 908 bcopy(&tmp, *buf, sizeof (tmp)); 909 *buf += sizeof (tmp); 910 } 911 if (types & XKB_WALK_STR) { 912 **buf = '\0'; 913 (*buf)++; 914 } 915 } 916 917 for (;;) { 918 if (!xkb_read_word(xkb, 919 modctl + offsetof(struct modctl, mod_mp), &modulep)) 920 return (0); 921 922 if (modulep == NULL) 923 goto next; 924 925 if (!xkb_read_module(xkb, modulep, &module, &sym_addr, 926 &sym_count, &str_addr)) 927 return (0); 928 929 if ((module.flags & KOBJ_NOKSYMS)) 930 goto next; 931 932 if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr, 933 str_addr, sym_count)) 934 return (0); 935 936 next: 937 if (!xkb_read_word(xkb, 938 modctl + offsetof(struct modctl, mod_next), &modctl)) 939 return (0); 940 941 if (modctl == modhead) 942 break; 943 /* 944 * Try and prevent us looping forever if we have a broken list. 945 */ 946 if (--max_iter == 0) 947 break; 948 } 949 950 return (1); 951 } 952 953 /* 954 * Userspace equivalent of ksyms_snapshot(). Since we don't have a namelist 955 * file for hypervisor images, we fabricate one here using code similar 956 * to that of /dev/ksyms. 957 */ 958 static int 959 xkb_build_ksyms(xkb_t *xkb) 960 { 961 debug_info_t *info = &xkb->xkb_info; 962 size_t sizes[XKB_WALK_STR + 1]; 963 xkb_namelist_t *hdr; 964 char *buf; 965 struct modctl modules; 966 uintptr_t module; 967 Shdr *shp; 968 969 if (xkb_read(xkb, info->di_modules, &modules, 970 sizeof (struct modctl)) != sizeof (struct modctl)) 971 return (0); 972 973 module = (uintptr_t)modules.mod_mp; 974 975 if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes, 976 XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)) 977 return (0); 978 979 xkb->xkb_namesize = sizeof (xkb_namelist_t); 980 xkb->xkb_namesize += sizes[XKB_WALK_LOCAL]; 981 xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL]; 982 xkb->xkb_namesize += sizes[XKB_WALK_STR]; 983 984 if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP)) 985 == NULL) 986 return (0); 987 988 /* LINTED - alignment */ 989 hdr = (xkb_namelist_t *)xkb->xkb_namelist; 990 991 if (xkb_read(xkb, module + offsetof(struct module, hdr), 992 &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr)) 993 return (0); 994 995 hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr); 996 hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr); 997 hdr->kh_elf_hdr.e_phnum = 2; 998 hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM; 999 hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB; 1000 1001 hdr->kh_text_phdr.p_type = PT_LOAD; 1002 hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text; 1003 hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text); 1004 hdr->kh_text_phdr.p_flags = PF_R | PF_X; 1005 1006 hdr->kh_data_phdr.p_type = PT_LOAD; 1007 hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data; 1008 hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data); 1009 hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X; 1010 1011 shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB]; 1012 shp->sh_name = 1; /* xkb_shstrtab[1] = ".symtab" */ 1013 shp->sh_type = SHT_SYMTAB; 1014 shp->sh_offset = sizeof (xkb_namelist_t); 1015 shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL]; 1016 shp->sh_link = XKB_SHDR_STRTAB; 1017 shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym); 1018 shp->sh_addralign = sizeof (Addr); 1019 shp->sh_entsize = sizeof (Sym); 1020 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset); 1021 1022 1023 shp = &hdr->kh_shdr[XKB_SHDR_STRTAB]; 1024 shp->sh_name = 9; /* xkb_shstrtab[9] = ".strtab" */ 1025 shp->sh_type = SHT_STRTAB; 1026 shp->sh_offset = sizeof (xkb_namelist_t) + 1027 sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL]; 1028 shp->sh_size = sizes[XKB_WALK_STR]; 1029 shp->sh_addralign = 1; 1030 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset); 1031 1032 1033 shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB]; 1034 shp->sh_name = 17; /* xkb_shstrtab[17] = ".shstrtab" */ 1035 shp->sh_type = SHT_STRTAB; 1036 shp->sh_offset = offsetof(xkb_namelist_t, shstrings); 1037 shp->sh_size = sizeof (xkb_shstrtab); 1038 shp->sh_addralign = 1; 1039 shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset); 1040 1041 bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab)); 1042 1043 buf = xkb->xkb_namelist + sizeof (xkb_namelist_t); 1044 1045 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes, 1046 XKB_WALK_LOCAL)) 1047 return (0); 1048 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes, 1049 XKB_WALK_GLOBAL)) 1050 return (0); 1051 if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes, 1052 XKB_WALK_STR)) 1053 return (0); 1054 1055 return (1); 1056 } 1057 1058 static xkb_t * 1059 xkb_open_core(xkb_t *xkb) 1060 { 1061 xkb_core_t *xc = &xkb->xkb_core; 1062 size_t sz; 1063 int i; 1064 struct vcpu_guest_context *vcp; 1065 1066 xkb->xkb_type = XKB_FORMAT_CORE; 1067 1068 if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1) 1069 return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path)); 1070 1071 if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) != 1072 sizeof (xc->xc_hdr)) 1073 return (xkb_fail(xkb, "invalid dump file")); 1074 1075 if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM) 1076 return (xkb_fail(xkb, "cannot process HVM images")); 1077 1078 if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) { 1079 return (xkb_fail(xkb, "invalid magic %d", 1080 xc->xc_hdr.xch_magic)); 1081 } 1082 1083 /* 1084 * With FORMAT_CORE, all pages are in the dump (non-existing 1085 * ones are zeroed out). 1086 */ 1087 xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages; 1088 xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset; 1089 xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1; 1090 xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus; 1091 1092 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context); 1093 xkb->xkb_vcpu_data_sz = sz; 1094 xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP); 1095 1096 if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz, 1097 xc->xc_hdr.xch_ctxt_offset) != sz) 1098 return (xkb_fail(xkb, "cannot read VCPU contexts")); 1099 1100 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *); 1101 xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP); 1102 1103 vcp = xkb->xkb_vcpu_data; 1104 for (i = 0; i < xkb->xkb_nr_vcpus; i++) 1105 xkb->xkb_vcpus[i] = &vcp[i]; 1106 1107 /* 1108 * Try to map all the data pages. If we can't, fall back to the 1109 * window/pread() approach, which is significantly slower. 1110 */ 1111 xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages, 1112 PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset); 1113 1114 if (xkb->xkb_pages == (char *)MAP_FAILED) 1115 xkb->xkb_pages = NULL; 1116 1117 /* 1118 * We'd like to adapt for correctness' sake, but we have no way of 1119 * detecting a PAE guest, since cr4 writes are disallowed. 1120 */ 1121 xkb->xkb_is_pae = 1; 1122 1123 if (!xkb_map_p2m(xkb)) 1124 return (NULL); 1125 1126 return (xkb); 1127 } 1128 1129 static xkb_t * 1130 xkb_open_elf(xkb_t *xkb) 1131 { 1132 xkb_elf_t *xe = &xkb->xkb_elf; 1133 mdb_gelf_sect_t *sect; 1134 char *notes; 1135 char *pos; 1136 mdb_io_t *io; 1137 size_t sz; 1138 int i; 1139 void *dp; 1140 1141 if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path, 1142 O_RDONLY, 0)) == NULL) 1143 return (xkb_fail(xkb, "failed to open")); 1144 1145 xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE); 1146 1147 if (xe->xe_gelf == NULL) { 1148 mdb_io_destroy(io); 1149 return (xkb); 1150 } 1151 1152 xkb->xkb_fd = mdb_fdio_fileno(io); 1153 1154 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen"); 1155 1156 if (sect == NULL) 1157 return (xkb); 1158 1159 if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) 1160 return (xkb); 1161 1162 /* 1163 * Now we know this is indeed a hypervisor core dump, even if 1164 * it's corrupted. 1165 */ 1166 xkb->xkb_type = XKB_FORMAT_ELF; 1167 1168 for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) { 1169 /* LINTED - alignment */ 1170 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos; 1171 uint64_t vers; 1172 char *desc; 1173 char *name; 1174 1175 name = pos + sizeof (*nhdr); 1176 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4); 1177 1178 pos = desc + nhdr->n_descsz; 1179 1180 switch (nhdr->n_type) { 1181 case XEN_ELFNOTE_DUMPCORE_NONE: 1182 break; 1183 1184 case XEN_ELFNOTE_DUMPCORE_HEADER: 1185 if (nhdr->n_descsz != sizeof (struct xc_elf_header)) { 1186 return (xkb_fail(xkb, "invalid ELF note " 1187 "XEN_ELFNOTE_DUMPCORE_HEADER\n")); 1188 } 1189 1190 bcopy(desc, &xe->xe_hdr, 1191 sizeof (struct xc_elf_header)); 1192 break; 1193 1194 case XEN_ELFNOTE_DUMPCORE_XEN_VERSION: 1195 if (nhdr->n_descsz < sizeof (struct xc_elf_version)) { 1196 return (xkb_fail(xkb, "invalid ELF note " 1197 "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n")); 1198 } 1199 1200 bcopy(desc, &xe->xe_version, 1201 sizeof (struct xc_elf_version)); 1202 break; 1203 1204 case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION: 1205 /* LINTED - alignment */ 1206 vers = *((uint64_t *)desc); 1207 if ((vers >> 32) != 0) { 1208 return (xkb_fail(xkb, "unknown major " 1209 "version %d (expected 0)\n", 1210 (int)(vers >> 32))); 1211 } 1212 1213 if ((vers & 0xffffffff) != 1) { 1214 mdb_warn("unexpected dump minor number " 1215 "version %d (expected 1)\n", 1216 (int)(vers & 0xffffffff)); 1217 } 1218 break; 1219 1220 default: 1221 mdb_warn("unknown ELF note %d(%s)\n", 1222 nhdr->n_type, name); 1223 break; 1224 } 1225 } 1226 1227 xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM; 1228 1229 if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC && 1230 xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) { 1231 return (xkb_fail(xkb, "invalid magic %d", 1232 xe->xe_hdr.xeh_magic)); 1233 } 1234 1235 xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages; 1236 xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities, 1237 "x86_32p") != NULL); 1238 1239 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus"); 1240 1241 if (sect == NULL) 1242 return (xkb_fail(xkb, "cannot find section .xen_prstatus")); 1243 1244 if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t)) 1245 return (xkb_fail(xkb, "invalid section .xen_prstatus")); 1246 1247 xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize; 1248 1249 xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect); 1250 if (xkb->xkb_vcpu_data == NULL) 1251 return (xkb_fail(xkb, "cannot load section .xen_prstatus")); 1252 xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size; 1253 1254 /* 1255 * The vcpu_guest_context structures saved in the core file 1256 * are actually unions of the 64-bit and 32-bit versions. 1257 * Don't rely on the entry size to match the size of 1258 * the structure, but set up an array of pointers. 1259 */ 1260 sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *); 1261 xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP); 1262 for (i = 0; i < xkb->xkb_nr_vcpus; i++) { 1263 dp = ((char *)xkb->xkb_vcpu_data + 1264 i * sect->gs_shdr.sh_entsize); 1265 xkb->xkb_vcpus[i] = dp; 1266 } 1267 1268 sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages"); 1269 1270 if (sect == NULL) 1271 return (xkb_fail(xkb, "cannot find section .xen_pages")); 1272 1273 if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset)) 1274 return (xkb_fail(xkb, ".xen_pages is not page aligned")); 1275 1276 if (sect->gs_shdr.sh_entsize != PAGE_SIZE) 1277 return (xkb_fail(xkb, "invalid section .xen_pages")); 1278 1279 xkb->xkb_pages_off = sect->gs_shdr.sh_offset; 1280 1281 /* 1282 * Try to map all the data pages. If we can't, fall back to the 1283 * window/pread() approach, which is significantly slower. 1284 */ 1285 xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages, 1286 PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off); 1287 1288 if (xkb->xkb_pages == (char *)MAP_FAILED) 1289 xkb->xkb_pages = NULL; 1290 1291 if (xkb->xkb_is_hvm) { 1292 if (!xkb_build_fake_p2m(xkb)) 1293 return (NULL); 1294 } else { 1295 if (!xkb_build_p2m(xkb)) 1296 return (NULL); 1297 } 1298 1299 return (xkb); 1300 } 1301 1302 static void 1303 xkb_init_mmu(xkb_t *xkb) 1304 { 1305 #if defined(__amd64) 1306 xkb->xkb_mmu.mi_max = 3; 1307 xkb->xkb_mmu.mi_shift[0] = 12; 1308 xkb->xkb_mmu.mi_shift[1] = 21; 1309 xkb->xkb_mmu.mi_shift[2] = 30; 1310 xkb->xkb_mmu.mi_shift[3] = 39; 1311 xkb->xkb_mmu.mi_ptes = 512; 1312 xkb->xkb_mmu.mi_ptesize = 8; 1313 #elif defined(__i386) 1314 if (xkb->xkb_is_pae) { 1315 xkb->xkb_mmu.mi_max = 2; 1316 xkb->xkb_mmu.mi_shift[0] = 12; 1317 xkb->xkb_mmu.mi_shift[1] = 21; 1318 xkb->xkb_mmu.mi_shift[2] = 30; 1319 xkb->xkb_mmu.mi_ptes = 512; 1320 xkb->xkb_mmu.mi_ptesize = 8; 1321 } else { 1322 xkb->xkb_mmu.mi_max = 1; 1323 xkb->xkb_mmu.mi_shift[0] = 12; 1324 xkb->xkb_mmu.mi_shift[1] = 22; 1325 xkb->xkb_mmu.mi_ptes = 1024; 1326 xkb->xkb_mmu.mi_ptesize = 4; 1327 } 1328 #endif 1329 } 1330 1331 /*ARGSUSED*/ 1332 xkb_t * 1333 xkb_open(const char *namelist, const char *corefile, const char *swapfile, 1334 int flag, const char *err) 1335 { 1336 uintptr_t debug_info = DEBUG_INFO; 1337 struct stat64 corestat; 1338 xkb_t *xkb = NULL; 1339 size_t i; 1340 1341 if (stat64(corefile, &corestat) == -1) 1342 return (xkb_fail(xkb, "cannot stat %s", corefile)); 1343 1344 if (flag != O_RDONLY) 1345 return (xkb_fail(xkb, "invalid open flags")); 1346 1347 xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP); 1348 1349 for (i = 0; i < 4; i++) { 1350 xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID; 1351 xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED; 1352 } 1353 1354 xkb->xkb_type = XKB_FORMAT_UNKNOWN; 1355 xkb->xkb_map.mm_mfn = MFN_INVALID; 1356 xkb->xkb_map.mm_map = (char *)MAP_FAILED; 1357 xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED; 1358 xkb->xkb_fd = -1; 1359 1360 xkb->xkb_path = strdup(corefile); 1361 1362 if ((xkb = xkb_open_elf(xkb)) == NULL) 1363 return (NULL); 1364 1365 if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) { 1366 if (!xkb_open_core(xkb)) 1367 return (NULL); 1368 } 1369 1370 xkb_init_mmu(xkb); 1371 1372 if (!xkb_build_m2p(xkb)) 1373 return (NULL); 1374 1375 if (xkb->xkb_is_hvm) 1376 debug_info = DEBUG_INFO_HVM; 1377 1378 if (xkb_read(xkb, debug_info, &xkb->xkb_info, 1379 sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info)) 1380 return (xkb_fail(xkb, "cannot read debug_info")); 1381 1382 if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) { 1383 return (xkb_fail(xkb, "invalid debug info magic %d", 1384 xkb->xkb_info.di_magic)); 1385 } 1386 1387 if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) { 1388 return (xkb_fail(xkb, "unknown debug info version %d", 1389 xkb->xkb_info.di_version)); 1390 } 1391 1392 if (!xkb_build_ksyms(xkb)) 1393 return (xkb_fail(xkb, "cannot construct namelist")); 1394 1395 return (xkb); 1396 } 1397 1398 int 1399 xkb_close(xkb_t *xkb) 1400 { 1401 size_t i, sz; 1402 1403 if (xkb == NULL) 1404 return (0); 1405 1406 if (xkb->xkb_m2p != NULL) { 1407 mdb_free(xkb->xkb_m2p, 1408 (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t)); 1409 } 1410 1411 if (xkb->xkb_pages != NULL) { 1412 (void) munmap((void *)xkb->xkb_pages, 1413 PAGE_SIZE * xkb->xkb_nr_pages); 1414 } else { 1415 for (i = 0; i < 4; i++) { 1416 char *addr = xkb->xkb_pt_map[i].mm_map; 1417 if (addr != (char *)MAP_FAILED) 1418 (void) munmap((void *)addr, PAGE_SIZE); 1419 } 1420 if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) { 1421 (void) munmap((void *)xkb->xkb_map.mm_map, 1422 PAGE_SIZE); 1423 } 1424 } 1425 1426 if (xkb->xkb_namelist != NULL) 1427 mdb_free(xkb->xkb_namelist, xkb->xkb_namesize); 1428 1429 if (xkb->xkb_type == XKB_FORMAT_ELF) { 1430 xkb_elf_t *xe = &xkb->xkb_elf; 1431 1432 if (xe->xe_gelf != NULL) 1433 mdb_gelf_destroy(xe->xe_gelf); 1434 1435 sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1); 1436 1437 if (xkb->xkb_p2m != NULL) 1438 mdb_free(xkb->xkb_p2m, sz); 1439 1440 sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1); 1441 1442 if (xe->xe_off != NULL) 1443 mdb_free(xe->xe_off, sz); 1444 1445 } else if (xkb->xkb_type == XKB_FORMAT_CORE) { 1446 xkb_core_t *xc = &xkb->xkb_core; 1447 1448 if (xkb->xkb_fd != -1) 1449 (void) close(xkb->xkb_fd); 1450 1451 sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2); 1452 sz = PAGE_MASK(sz); 1453 1454 if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED) 1455 (void) munmap(xc->xc_p2m_buf, sz); 1456 1457 if (xkb->xkb_vcpu_data != NULL) 1458 mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz); 1459 } 1460 1461 if (xkb->xkb_vcpus != NULL) { 1462 sz = sizeof (struct vcpu_guest_context *) * 1463 xkb->xkb_nr_vcpus; 1464 mdb_free(xkb->xkb_vcpus, sz); 1465 } 1466 1467 free(xkb->xkb_path); 1468 1469 mdb_free(xkb, sizeof (*xkb)); 1470 return (0); 1471 } 1472 1473 /*ARGSUSED*/ 1474 static mdb_io_t * 1475 xkb_sym_io(xkb_t *xkb, const char *symfile) 1476 { 1477 mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize); 1478 1479 if (io == NULL) 1480 mdb_warn("failed to create namelist from %s", xkb->xkb_path); 1481 1482 return (io); 1483 } 1484 1485 uint64_t 1486 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr) 1487 { 1488 mfn_t tlmfn = xkb_cr3_to_pfn(xkb); 1489 mfn_t mfn; 1490 1491 if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID) 1492 return (-1ULL); 1493 1494 mfn = xkb_va_to_mfn(xkb, addr, tlmfn); 1495 1496 if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn) 1497 return (-1ULL); 1498 1499 return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT) 1500 | PAGE_OFFSET(addr)); 1501 } 1502 1503 static int 1504 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs) 1505 { 1506 struct vcpu_guest_context *vcpu; 1507 struct cpu_user_regs *ur; 1508 struct regs *regs; 1509 1510 if (cpu >= xkb->xkb_nr_vcpus) { 1511 errno = EINVAL; 1512 return (-1); 1513 } 1514 1515 bzero(mregs, sizeof (*mregs)); 1516 1517 vcpu = xkb->xkb_vcpus[cpu]; 1518 ur = &vcpu->user_regs; 1519 regs = &mregs->pm_gregs; 1520 1521 regs->r_ss = ur->ss; 1522 regs->r_cs = ur->cs; 1523 regs->r_ds = ur->ds; 1524 regs->r_es = ur->es; 1525 regs->r_fs = ur->fs; 1526 regs->r_gs = ur->gs; 1527 regs->r_trapno = ur->entry_vector; 1528 regs->r_err = ur->error_code; 1529 #ifdef __amd64 1530 regs->r_savfp = ur->rbp; 1531 regs->r_savpc = ur->rip; 1532 regs->r_rdi = ur->rdi; 1533 regs->r_rsi = ur->rsi; 1534 regs->r_rdx = ur->rdx; 1535 regs->r_rcx = ur->rcx; 1536 regs->r_r8 = ur->r8; 1537 regs->r_r9 = ur->r9; 1538 regs->r_rax = ur->rax; 1539 regs->r_rbx = ur->rbx; 1540 regs->r_rbp = ur->rbp; 1541 regs->r_r10 = ur->r10; 1542 regs->r_r11 = ur->r11; 1543 regs->r_r12 = ur->r12; 1544 regs->r_r13 = ur->r13; 1545 regs->r_r14 = ur->r14; 1546 regs->r_r15 = ur->r15; 1547 regs->r_rip = ur->rip; 1548 regs->r_rfl = ur->rflags; 1549 regs->r_rsp = ur->rsp; 1550 #else 1551 regs->r_savfp = ur->ebp; 1552 regs->r_savpc = ur->eip; 1553 regs->r_edi = ur->edi; 1554 regs->r_esi = ur->esi; 1555 regs->r_ebp = ur->ebp; 1556 regs->r_esp = ur->esp; 1557 regs->r_ebx = ur->ebx; 1558 regs->r_edx = ur->edx; 1559 regs->r_ecx = ur->ecx; 1560 regs->r_eax = ur->eax; 1561 regs->r_eip = ur->eip; 1562 regs->r_efl = ur->eflags; 1563 regs->r_uesp = 0; 1564 #endif 1565 1566 bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t)); 1567 bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t)); 1568 1569 mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS; 1570 1571 return (0); 1572 } 1573 1574 static mdb_kb_ops_t xpv_kb_ops = { 1575 .kb_open = (void *(*)())xkb_open, 1576 .kb_close = (int (*)())xkb_close, 1577 .kb_sym_io = (mdb_io_t *(*)())xkb_sym_io, 1578 .kb_kread = (ssize_t (*)())xkb_read, 1579 .kb_kwrite = (ssize_t (*)())mdb_tgt_notsup, 1580 .kb_aread = (ssize_t (*)())xkb_aread, 1581 .kb_awrite = (ssize_t (*)())mdb_tgt_notsup, 1582 .kb_pread = (ssize_t (*)())xkb_pread, 1583 .kb_pwrite = (ssize_t (*)())mdb_tgt_notsup, 1584 .kb_vtop = (uint64_t (*)())xkb_vtop, 1585 .kb_getmregs = (int (*)())xkb_getmregs 1586 }; 1587 1588 mdb_kb_ops_t * 1589 mdb_kb_ops(void) 1590 { 1591 return (&xpv_kb_ops); 1592 } 1593 1594 static const mdb_dcmd_t dcmds[] = { NULL, }; 1595 static const mdb_walker_t walkers[] = { NULL, }; 1596 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers }; 1597 1598 const mdb_modinfo_t * 1599 _mdb_init(void) 1600 { 1601 return (&modinfo); 1602 } 1603 1604 void 1605 _mdb_fini(void) 1606 { 1607 } 1608